{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f0e75b21",
   "metadata": {},
   "source": [
    "[代码参考链接 - Multi-Label Classification with Deep Learning](https://machinelearningmastery.com/multi-label-classification-with-deep-learning/) <br>\n",
    "[代码参考链接 - kaggle - Arunkr - MOA:simple Model](https://www.kaggle.com/arunkr1/moa-simple-model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b8604bc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy\n",
    "from scipy.optimize import curve_fit\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f3422d8f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['training', 'test']\n",
      "(1974, 6)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1859</th>\n",
       "      <td>CN(C)CCOc1ccc(OC2=C(C(=O)Oc3cc(O)ccc23)c4ccc(F...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207</th>\n",
       "      <td>Cc1ccc(cc1)S(=O)(=O)Cc2oc(cc2)C(=O)O</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1510</th>\n",
       "      <td>Cc1cc(OC(F)(F)F)ccc1C2=C(Cc3ccc(\\C=C\\C(=O)O)cc...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>CN(C[C@@H]1C[C@@H]1c2ccccc2)c3nc(NCCc4ccc(O)cc...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1573</th>\n",
       "      <td>C[C@@H](CO)CN1[C@H](C)Cc2c([nH]c3ccccc23)[C@H]...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 SMILES  Caco-2  CYP3A4  hERG  \\\n",
       "1859  CN(C)CCOc1ccc(OC2=C(C(=O)Oc3cc(O)ccc23)c4ccc(F...       0       1     0   \n",
       "1207               Cc1ccc(cc1)S(=O)(=O)Cc2oc(cc2)C(=O)O       1       0     0   \n",
       "1510  Cc1cc(OC(F)(F)F)ccc1C2=C(Cc3ccc(\\C=C\\C(=O)O)cc...       0       1     0   \n",
       "52    CN(C[C@@H]1C[C@@H]1c2ccccc2)c3nc(NCCc4ccc(O)cc...       0       1     1   \n",
       "1573  C[C@@H](CO)CN1[C@H](C)Cc2c([nH]c3ccccc23)[C@H]...       0       1     1   \n",
       "\n",
       "      HOB  MN  \n",
       "1859    1   0  \n",
       "1207    1   1  \n",
       "1510    0   1  \n",
       "52      0   1  \n",
       "1573    0   1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataADMET_xls = pd.ExcelFile('../ADMET.xlsx')\n",
    "sheets = dataADMET_xls.sheet_names\n",
    "print(sheets)\n",
    "\n",
    "dataADMET_Train = pd.read_excel('../ADMET.xlsx', sheet_name=sheets[0])\n",
    "print(dataADMET_Train.shape)\n",
    "dataADMET_Train.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f029e16b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['training', 'test']\n",
      "(1974, 730)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>nAtom</th>\n",
       "      <th>nHeavyAtom</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1740</th>\n",
       "      <td>CC1(N(CCc2cc(O)ccc12)c3ccc(F)cc3)c4ccc(\\C=C\\c5...</td>\n",
       "      <td>0</td>\n",
       "      <td>1.6299</td>\n",
       "      <td>2.656574</td>\n",
       "      <td>138.4346</td>\n",
       "      <td>68.182032</td>\n",
       "      <td>23</td>\n",
       "      <td>23</td>\n",
       "      <td>56</td>\n",
       "      <td>32</td>\n",
       "      <td>...</td>\n",
       "      <td>425.190341</td>\n",
       "      <td>66.362014</td>\n",
       "      <td>2.073813</td>\n",
       "      <td>14.621298</td>\n",
       "      <td>2.547916</td>\n",
       "      <td>9.547233</td>\n",
       "      <td>2946</td>\n",
       "      <td>55</td>\n",
       "      <td>3.319</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>500</th>\n",
       "      <td>Cc1c(O)cc(C#N)c2cc(ccc12)c3ccc(O)c(F)c3</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0291</td>\n",
       "      <td>4.117247</td>\n",
       "      <td>90.2677</td>\n",
       "      <td>42.942516</td>\n",
       "      <td>16</td>\n",
       "      <td>18</td>\n",
       "      <td>34</td>\n",
       "      <td>22</td>\n",
       "      <td>...</td>\n",
       "      <td>293.085207</td>\n",
       "      <td>44.717246</td>\n",
       "      <td>2.032602</td>\n",
       "      <td>10.063181</td>\n",
       "      <td>5.051402</td>\n",
       "      <td>2.487058</td>\n",
       "      <td>1027</td>\n",
       "      <td>39</td>\n",
       "      <td>2.304</td>\n",
       "      <td>118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>452</th>\n",
       "      <td>CCSC1=C(C(=O)c2ccc(O)cc12)c3ccc(O)cc3</td>\n",
       "      <td>0</td>\n",
       "      <td>1.5987</td>\n",
       "      <td>2.555842</td>\n",
       "      <td>93.0185</td>\n",
       "      <td>44.561102</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>35</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>298.066365</td>\n",
       "      <td>42.841559</td>\n",
       "      <td>2.040074</td>\n",
       "      <td>10.665630</td>\n",
       "      <td>7.684233</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>860</td>\n",
       "      <td>35</td>\n",
       "      <td>1.919</td>\n",
       "      <td>112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1790</th>\n",
       "      <td>NC(=O)\\C=C\\c1ccc(Oc2c(sc3cc(O)ccc23)c4ccc(O)cc...</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3406</td>\n",
       "      <td>1.797208</td>\n",
       "      <td>127.1451</td>\n",
       "      <td>59.023481</td>\n",
       "      <td>21</td>\n",
       "      <td>23</td>\n",
       "      <td>46</td>\n",
       "      <td>29</td>\n",
       "      <td>...</td>\n",
       "      <td>403.087829</td>\n",
       "      <td>59.634945</td>\n",
       "      <td>2.056377</td>\n",
       "      <td>16.270276</td>\n",
       "      <td>10.671094</td>\n",
       "      <td>2.391176</td>\n",
       "      <td>2385</td>\n",
       "      <td>44</td>\n",
       "      <td>1.792</td>\n",
       "      <td>154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>775</th>\n",
       "      <td>CCCCC12CCC(=O)C(=C1c3ccc4[nH]ncc4c3C2)Br</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0976</td>\n",
       "      <td>0.009526</td>\n",
       "      <td>92.6763</td>\n",
       "      <td>50.401067</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>41</td>\n",
       "      <td>22</td>\n",
       "      <td>...</td>\n",
       "      <td>358.068075</td>\n",
       "      <td>45.732601</td>\n",
       "      <td>2.078755</td>\n",
       "      <td>11.278514</td>\n",
       "      <td>2.539437</td>\n",
       "      <td>6.165827</td>\n",
       "      <td>915</td>\n",
       "      <td>41</td>\n",
       "      <td>3.995</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 730 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 SMILES  nAcid   ALogP  \\\n",
       "1740  CC1(N(CCc2cc(O)ccc12)c3ccc(F)cc3)c4ccc(\\C=C\\c5...      0  1.6299   \n",
       "500             Cc1c(O)cc(C#N)c2cc(ccc12)c3ccc(O)c(F)c3      0  2.0291   \n",
       "452               CCSC1=C(C(=O)c2ccc(O)cc12)c3ccc(O)cc3      0  1.5987   \n",
       "1790  NC(=O)\\C=C\\c1ccc(Oc2c(sc3cc(O)ccc23)c4ccc(O)cc...      0  1.3406   \n",
       "775            CCCCC12CCC(=O)C(=C1c3ccc4[nH]ncc4c3C2)Br      0  0.0976   \n",
       "\n",
       "        ALogp2       AMR       apol  naAromAtom  nAromBond  nAtom  nHeavyAtom  \\\n",
       "1740  2.656574  138.4346  68.182032          23         23     56          32   \n",
       "500   4.117247   90.2677  42.942516          16         18     34          22   \n",
       "452   2.555842   93.0185  44.561102          12         12     35          21   \n",
       "1790  1.797208  127.1451  59.023481          21         23     46          29   \n",
       "775   0.009526   92.6763  50.401067           9         10     41          22   \n",
       "\n",
       "      ...          MW     WTPT-1    WTPT-2     WTPT-3     WTPT-4    WTPT-5  \\\n",
       "1740  ...  425.190341  66.362014  2.073813  14.621298   2.547916  9.547233   \n",
       "500   ...  293.085207  44.717246  2.032602  10.063181   5.051402  2.487058   \n",
       "452   ...  298.066365  42.841559  2.040074  10.665630   7.684233  0.000000   \n",
       "1790  ...  403.087829  59.634945  2.056377  16.270276  10.671094  2.391176   \n",
       "775   ...  358.068075  45.732601  2.078755  11.278514   2.539437  6.165827   \n",
       "\n",
       "      WPATH  WPOL  XLogP  Zagreb  \n",
       "1740   2946    55  3.319     176  \n",
       "500    1027    39  2.304     118  \n",
       "452     860    35  1.919     112  \n",
       "1790   2385    44  1.792     154  \n",
       "775     915    41  3.995     126  \n",
       "\n",
       "[5 rows x 730 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataMol_xls = pd.ExcelFile('../Molecular_Descriptor.xlsx')\n",
    "sheets2 = dataMol_xls.sheet_names\n",
    "print(sheets2)\n",
    "\n",
    "dataMol_Train = pd.read_excel('../Molecular_Descriptor.xlsx', sheet_name=sheets2[0])\n",
    "print(dataMol_Train.shape)\n",
    "dataMol_Train.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10510539",
   "metadata": {},
   "source": [
    "### 将ADMET, Molecular 表格合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e671c092",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCC3)c4ccc(OCCN5C...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.2860</td>\n",
       "      <td>0.081796</td>\n",
       "      <td>126.1188</td>\n",
       "      <td>...</td>\n",
       "      <td>439.218115</td>\n",
       "      <td>64.771680</td>\n",
       "      <td>2.089409</td>\n",
       "      <td>15.471445</td>\n",
       "      <td>8.858910</td>\n",
       "      <td>3.406628</td>\n",
       "      <td>3011</td>\n",
       "      <td>47</td>\n",
       "      <td>4.666</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCCCC3)c4ccc(OCCN...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.8620</td>\n",
       "      <td>0.743044</td>\n",
       "      <td>131.9420</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.960024</td>\n",
       "      <td>2.089698</td>\n",
       "      <td>15.486947</td>\n",
       "      <td>8.863774</td>\n",
       "      <td>3.406648</td>\n",
       "      <td>3516</td>\n",
       "      <td>54</td>\n",
       "      <td>5.804</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Oc1ccc(cc1)[C@H]2Sc3cc(O)ccc3O[C@H]2c4ccc(OCCN...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.7296</td>\n",
       "      <td>0.532316</td>\n",
       "      <td>139.9304</td>\n",
       "      <td>...</td>\n",
       "      <td>463.181729</td>\n",
       "      <td>68.748923</td>\n",
       "      <td>2.083301</td>\n",
       "      <td>18.011114</td>\n",
       "      <td>11.390412</td>\n",
       "      <td>3.406644</td>\n",
       "      <td>3542</td>\n",
       "      <td>52</td>\n",
       "      <td>2.964</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@@H](CC3CCCCC3)Sc2c1)c4ccc(OCC...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.3184</td>\n",
       "      <td>0.101379</td>\n",
       "      <td>133.4822</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>6.015</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@@H](Cc3ccccc3)Sc2c1)c4ccc(OCC...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3551</td>\n",
       "      <td>1.836296</td>\n",
       "      <td>143.1903</td>\n",
       "      <td>...</td>\n",
       "      <td>461.202465</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>4.462</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 735 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              SMILES  Caco-2  CYP3A4  hERG  \\\n",
       "0  Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCC3)c4ccc(OCCN5C...       0       1     1   \n",
       "1  Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCCCC3)c4ccc(OCCN...       0       1     1   \n",
       "2  Oc1ccc(cc1)[C@H]2Sc3cc(O)ccc3O[C@H]2c4ccc(OCCN...       0       1     1   \n",
       "3  Oc1ccc2O[C@H]([C@@H](CC3CCCCC3)Sc2c1)c4ccc(OCC...       0       1     1   \n",
       "4  Oc1ccc2O[C@H]([C@@H](Cc3ccccc3)Sc2c1)c4ccc(OCC...       0       1     1   \n",
       "\n",
       "   HOB  MN  nAcid   ALogP    ALogp2       AMR  ...          MW     WTPT-1  \\\n",
       "0    0   0      0 -0.2860  0.081796  126.1188  ...  439.218115  64.771680   \n",
       "1    0   0      0 -0.8620  0.743044  131.9420  ...  467.249415  68.960024   \n",
       "2    0   1      0  0.7296  0.532316  139.9304  ...  463.181729  68.748923   \n",
       "3    0   0      0 -0.3184  0.101379  133.4822  ...  467.249415  68.883696   \n",
       "4    0   0      0  1.3551  1.836296  143.1903  ...  461.202465  68.883696   \n",
       "\n",
       "     WTPT-2     WTPT-3     WTPT-4    WTPT-5  WPATH  WPOL  XLogP  Zagreb  \n",
       "0  2.089409  15.471445   8.858910  3.406628   3011    47  4.666     166  \n",
       "1  2.089698  15.486947   8.863774  3.406648   3516    54  5.804     174  \n",
       "2  2.083301  18.011114  11.390412  3.406644   3542    52  2.964     176  \n",
       "3  2.087385  15.468365   8.857943  3.406624   3594    50  6.015     174  \n",
       "4  2.087385  15.468365   8.857943  3.406624   3594    50  4.462     174  \n",
       "\n",
       "[5 rows x 735 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_train = pd.merge(dataADMET_Train,dataMol_Train,  on='SMILES')\n",
    "data_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "00a1539a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.2860</td>\n",
       "      <td>0.081796</td>\n",
       "      <td>126.1188</td>\n",
       "      <td>74.170169</td>\n",
       "      <td>...</td>\n",
       "      <td>439.218115</td>\n",
       "      <td>64.771680</td>\n",
       "      <td>2.089409</td>\n",
       "      <td>15.471445</td>\n",
       "      <td>8.858910</td>\n",
       "      <td>3.406628</td>\n",
       "      <td>3011</td>\n",
       "      <td>47</td>\n",
       "      <td>4.666</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.8620</td>\n",
       "      <td>0.743044</td>\n",
       "      <td>131.9420</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.960024</td>\n",
       "      <td>2.089698</td>\n",
       "      <td>15.486947</td>\n",
       "      <td>8.863774</td>\n",
       "      <td>3.406648</td>\n",
       "      <td>3516</td>\n",
       "      <td>54</td>\n",
       "      <td>5.804</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.7296</td>\n",
       "      <td>0.532316</td>\n",
       "      <td>139.9304</td>\n",
       "      <td>74.064997</td>\n",
       "      <td>...</td>\n",
       "      <td>463.181729</td>\n",
       "      <td>68.748923</td>\n",
       "      <td>2.083301</td>\n",
       "      <td>18.011114</td>\n",
       "      <td>11.390412</td>\n",
       "      <td>3.406644</td>\n",
       "      <td>3542</td>\n",
       "      <td>52</td>\n",
       "      <td>2.964</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.3184</td>\n",
       "      <td>0.101379</td>\n",
       "      <td>133.4822</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>6.015</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3551</td>\n",
       "      <td>1.836296</td>\n",
       "      <td>143.1903</td>\n",
       "      <td>76.356583</td>\n",
       "      <td>...</td>\n",
       "      <td>461.202465</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>4.462</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1969</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.709996</td>\n",
       "      <td>2.063023</td>\n",
       "      <td>25.470481</td>\n",
       "      <td>21.946991</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7121</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1970</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.662088</td>\n",
       "      <td>2.064929</td>\n",
       "      <td>24.928962</td>\n",
       "      <td>21.405589</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6171</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1971</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.660642</td>\n",
       "      <td>2.064894</td>\n",
       "      <td>24.923083</td>\n",
       "      <td>21.400883</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6421</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1972</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3365</td>\n",
       "      <td>1.786232</td>\n",
       "      <td>125.5605</td>\n",
       "      <td>63.287860</td>\n",
       "      <td>...</td>\n",
       "      <td>436.098059</td>\n",
       "      <td>64.171346</td>\n",
       "      <td>2.070043</td>\n",
       "      <td>19.841924</td>\n",
       "      <td>16.326873</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2583</td>\n",
       "      <td>50</td>\n",
       "      <td>0.782</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.708522</td>\n",
       "      <td>2.062989</td>\n",
       "      <td>25.464529</td>\n",
       "      <td>21.942236</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7421</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1974 rows × 734 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Caco-2  CYP3A4  hERG  HOB  MN  nAcid   ALogP    ALogp2       AMR  \\\n",
       "0          0       1     1    0   0      0 -0.2860  0.081796  126.1188   \n",
       "1          0       1     1    0   0      0 -0.8620  0.743044  131.9420   \n",
       "2          0       1     1    0   1      0  0.7296  0.532316  139.9304   \n",
       "3          0       1     1    0   0      0 -0.3184  0.101379  133.4822   \n",
       "4          0       1     1    0   0      0  1.3551  1.836296  143.1903   \n",
       "...      ...     ...   ...  ...  ..    ...     ...       ...       ...   \n",
       "1969       0       1     1    0   1      0  1.8193  3.309852  177.6817   \n",
       "1970       0       1     1    0   1      0  1.6903  2.857114  167.6057   \n",
       "1971       0       1     0    0   1      0  1.6903  2.857114  167.6057   \n",
       "1972       0       1     0    0   1      0  1.3365  1.786232  125.5605   \n",
       "1973       0       1     1    0   1      0  1.8193  3.309852  177.6817   \n",
       "\n",
       "           apol  ...          MW     WTPT-1    WTPT-2     WTPT-3     WTPT-4  \\\n",
       "0     74.170169  ...  439.218115  64.771680  2.089409  15.471445   8.858910   \n",
       "1     80.357341  ...  467.249415  68.960024  2.089698  15.486947   8.863774   \n",
       "2     74.064997  ...  463.181729  68.748923  2.083301  18.011114  11.390412   \n",
       "3     80.357341  ...  467.249415  68.883696  2.087385  15.468365   8.857943   \n",
       "4     76.356583  ...  461.202465  68.883696  2.087385  15.468365   8.857943   \n",
       "...         ...  ...         ...        ...       ...        ...        ...   \n",
       "1969  89.159790  ...  598.166139  88.709996  2.063023  25.470481  21.946991   \n",
       "1970  82.972618  ...  570.134839  84.662088  2.064929  24.928962  21.405589   \n",
       "1971  82.972618  ...  570.134839  84.660642  2.064894  24.923083  21.400883   \n",
       "1972  63.287860  ...  436.098059  64.171346  2.070043  19.841924  16.326873   \n",
       "1973  89.159790  ...  598.166139  88.708522  2.062989  25.464529  21.942236   \n",
       "\n",
       "        WTPT-5  WPATH  WPOL  XLogP  Zagreb  \n",
       "0     3.406628   3011    47  4.666     166  \n",
       "1     3.406648   3516    54  5.804     174  \n",
       "2     3.406644   3542    52  2.964     176  \n",
       "3     3.406624   3594    50  6.015     174  \n",
       "4     3.406624   3594    50  4.462     174  \n",
       "...        ...    ...   ...    ...     ...  \n",
       "1969  0.000000   7121    70  2.526     236  \n",
       "1970  0.000000   6171    66  1.884     228  \n",
       "1971  0.000000   6421    66  1.884     228  \n",
       "1972  0.000000   2583    50  0.782     174  \n",
       "1973  0.000000   7421    70  2.526     236  \n",
       "\n",
       "[1974 rows x 734 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_train = data_train[data_train.columns[1:].tolist()]\n",
    "data_train "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "aa835664",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>nAtom</th>\n",
       "      <th>nHeavyAtom</th>\n",
       "      <th>nH</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>-0.2860</td>\n",
       "      <td>0.081796</td>\n",
       "      <td>126.1188</td>\n",
       "      <td>74.170169</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>64</td>\n",
       "      <td>31</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>439.218115</td>\n",
       "      <td>64.771680</td>\n",
       "      <td>2.089409</td>\n",
       "      <td>15.471445</td>\n",
       "      <td>8.858910</td>\n",
       "      <td>3.406628</td>\n",
       "      <td>3011</td>\n",
       "      <td>47</td>\n",
       "      <td>4.666</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>-0.8620</td>\n",
       "      <td>0.743044</td>\n",
       "      <td>131.9420</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>70</td>\n",
       "      <td>33</td>\n",
       "      <td>37</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.960024</td>\n",
       "      <td>2.089698</td>\n",
       "      <td>15.486947</td>\n",
       "      <td>8.863774</td>\n",
       "      <td>3.406648</td>\n",
       "      <td>3516</td>\n",
       "      <td>54</td>\n",
       "      <td>5.804</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.7296</td>\n",
       "      <td>0.532316</td>\n",
       "      <td>139.9304</td>\n",
       "      <td>74.064997</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>62</td>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>...</td>\n",
       "      <td>463.181729</td>\n",
       "      <td>68.748923</td>\n",
       "      <td>2.083301</td>\n",
       "      <td>18.011114</td>\n",
       "      <td>11.390412</td>\n",
       "      <td>3.406644</td>\n",
       "      <td>3542</td>\n",
       "      <td>52</td>\n",
       "      <td>2.964</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>-0.3184</td>\n",
       "      <td>0.101379</td>\n",
       "      <td>133.4822</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>70</td>\n",
       "      <td>33</td>\n",
       "      <td>37</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>6.015</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>1.3551</td>\n",
       "      <td>1.836296</td>\n",
       "      <td>143.1903</td>\n",
       "      <td>76.356583</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>64</td>\n",
       "      <td>33</td>\n",
       "      <td>31</td>\n",
       "      <td>...</td>\n",
       "      <td>461.202465</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>4.462</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1969</th>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>73</td>\n",
       "      <td>43</td>\n",
       "      <td>30</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.709996</td>\n",
       "      <td>2.063023</td>\n",
       "      <td>25.470481</td>\n",
       "      <td>21.946991</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7121</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1970</th>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>41</td>\n",
       "      <td>26</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.662088</td>\n",
       "      <td>2.064929</td>\n",
       "      <td>24.928962</td>\n",
       "      <td>21.405589</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6171</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1971</th>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>41</td>\n",
       "      <td>26</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.660642</td>\n",
       "      <td>2.064894</td>\n",
       "      <td>24.923083</td>\n",
       "      <td>21.400883</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6421</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1972</th>\n",
       "      <td>0</td>\n",
       "      <td>1.3365</td>\n",
       "      <td>1.786232</td>\n",
       "      <td>125.5605</td>\n",
       "      <td>63.287860</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>51</td>\n",
       "      <td>31</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>436.098059</td>\n",
       "      <td>64.171346</td>\n",
       "      <td>2.070043</td>\n",
       "      <td>19.841924</td>\n",
       "      <td>16.326873</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2583</td>\n",
       "      <td>50</td>\n",
       "      <td>0.782</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>73</td>\n",
       "      <td>43</td>\n",
       "      <td>30</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.708522</td>\n",
       "      <td>2.062989</td>\n",
       "      <td>25.464529</td>\n",
       "      <td>21.942236</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7421</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1974 rows × 729 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      nAcid   ALogP    ALogp2       AMR       apol  naAromAtom  nAromBond  \\\n",
       "0         0 -0.2860  0.081796  126.1188  74.170169          12         12   \n",
       "1         0 -0.8620  0.743044  131.9420  80.357341          12         12   \n",
       "2         0  0.7296  0.532316  139.9304  74.064997          18         18   \n",
       "3         0 -0.3184  0.101379  133.4822  80.357341          12         12   \n",
       "4         0  1.3551  1.836296  143.1903  76.356583          18         18   \n",
       "...     ...     ...       ...       ...        ...         ...        ...   \n",
       "1969      0  1.8193  3.309852  177.6817  89.159790          24         24   \n",
       "1970      0  1.6903  2.857114  167.6057  82.972618          24         24   \n",
       "1971      0  1.6903  2.857114  167.6057  82.972618          24         24   \n",
       "1972      0  1.3365  1.786232  125.5605  63.287860          18         18   \n",
       "1973      0  1.8193  3.309852  177.6817  89.159790          24         24   \n",
       "\n",
       "      nAtom  nHeavyAtom  nH  ...          MW     WTPT-1    WTPT-2     WTPT-3  \\\n",
       "0        64          31  33  ...  439.218115  64.771680  2.089409  15.471445   \n",
       "1        70          33  37  ...  467.249415  68.960024  2.089698  15.486947   \n",
       "2        62          33  29  ...  463.181729  68.748923  2.083301  18.011114   \n",
       "3        70          33  37  ...  467.249415  68.883696  2.087385  15.468365   \n",
       "4        64          33  31  ...  461.202465  68.883696  2.087385  15.468365   \n",
       "...     ...         ...  ..  ...         ...        ...       ...        ...   \n",
       "1969     73          43  30  ...  598.166139  88.709996  2.063023  25.470481   \n",
       "1970     67          41  26  ...  570.134839  84.662088  2.064929  24.928962   \n",
       "1971     67          41  26  ...  570.134839  84.660642  2.064894  24.923083   \n",
       "1972     51          31  20  ...  436.098059  64.171346  2.070043  19.841924   \n",
       "1973     73          43  30  ...  598.166139  88.708522  2.062989  25.464529   \n",
       "\n",
       "         WTPT-4    WTPT-5  WPATH  WPOL  XLogP  Zagreb  \n",
       "0      8.858910  3.406628   3011    47  4.666     166  \n",
       "1      8.863774  3.406648   3516    54  5.804     174  \n",
       "2     11.390412  3.406644   3542    52  2.964     176  \n",
       "3      8.857943  3.406624   3594    50  6.015     174  \n",
       "4      8.857943  3.406624   3594    50  4.462     174  \n",
       "...         ...       ...    ...   ...    ...     ...  \n",
       "1969  21.946991  0.000000   7121    70  2.526     236  \n",
       "1970  21.405589  0.000000   6171    66  1.884     228  \n",
       "1971  21.400883  0.000000   6421    66  1.884     228  \n",
       "1972  16.326873  0.000000   2583    50  0.782     174  \n",
       "1973  21.942236  0.000000   7421    70  2.526     236  \n",
       "\n",
       "[1974 rows x 729 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataFeature = data_train[data_train.columns[5:].tolist()]\n",
    "dataFeature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b4a925ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1969</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1970</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1971</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1972</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1974 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Caco-2  CYP3A4  hERG  HOB  MN\n",
       "0          0       1     1    0   0\n",
       "1          0       1     1    0   0\n",
       "2          0       1     1    0   1\n",
       "3          0       1     1    0   0\n",
       "4          0       1     1    0   0\n",
       "...      ...     ...   ...  ...  ..\n",
       "1969       0       1     1    0   1\n",
       "1970       0       1     1    0   1\n",
       "1971       0       1     0    0   1\n",
       "1972       0       1     0    0   1\n",
       "1973       0       1     1    0   1\n",
       "\n",
       "[1974 rows x 5 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataTarget = data_train[data_train.columns[:5].tolist()]\n",
    "dataTarget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "eb6be11b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0    1215\n",
       " 1     759\n",
       " Name: Caco-2, dtype: int64,\n",
       " 1    1461\n",
       " 0     513\n",
       " Name: CYP3A4, dtype: int64,\n",
       " 1    1099\n",
       " 0     875\n",
       " Name: hERG, dtype: int64,\n",
       " 0    1465\n",
       " 1     509\n",
       " Name: HOB, dtype: int64,\n",
       " 1    1514\n",
       " 0     460\n",
       " Name: MN, dtype: int64]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp_count = []\n",
    "for i in dataTarget.columns:\n",
    "    temp_count.append(dataTarget[i].value_counts())\n",
    "temp_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "136fbe7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZWklEQVR4nO3dfZRdVX3/8fdHAgQEDQkD4kxkokRYBBHD8FABi6I8REpwFSUpQpCwUiW0/ESrPLRNI8tWWpWWH0h/QVICxUTqwyILYyAF8wOX5WGCBBIeZAAxMwUZAqYCRkj49o+zA4fJZGbuw9yZ6/681po15+yz7zl7J3c+d999zj1XEYGZmeXhLSPdADMzaxyHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZcehbU5M0XtIPJb0k6SlJfzZI/UskPShpk6S/a1Azh0zS30n695Fuh/3hcuhbs7sSeAXYEzgNuErSlAHqdwFfAn7UgLaZjToOfWtakt4K/CnwNxHxYkT8FFgKnL6tx0TEooj4MfDbIex/O0kXSXpc0m8lrZI0MW37oKR7JW1Ivz9YetwvJX20tP766F1Su6SQNEvSryQ9J+nitO144CLgVEkvSlqdys+U9ERqw5OSTqvin8sMgDEj3QCzGrwX2BQRvyiVrQb+uE77Px+YCUwDfgEcCLwsaTzFO4W/BBYDnwR+JGmfiFg/xH0fCexL0Yd7JP0gIpZL+ntgn4j4NLz+wnY5cEhEPCppL2B8nfpnGfJI35rZLsD/9CnbAOxap/2fDfx1RDwahdUp1D8OPBYR10fEpohYDDwC/EkF+54fEb+LiNUUL1TvH6Dua8ABknaKiKcjYm21HTJz6FszexF4W5+yt5GmbiStTdMkL0o6qor9TwQe76f8ncBTfcqeAlor2PczpeWXKV7AthIRLwGnAp8Fnpb0I0n7VXAcszdx6Fsz+wUwRtLkUtn7gbUAETElInZJP3dWsf91wHv6Kf9vYO8+Ze8CetLyS8DOpW3vqOCYW932NiJuiYiPAXtRvKO4uoL9mb2JQ9+aVhoF/wD4iqS3SjoCmA5cv63HSNpe0liK5/4YSWMlbbeN6t8GLpE0WYUDJU0AlgHvlfRnksZIOhXYH7g5Pe5+YEY6VgdwSgXd+jXQLuktqb17Spqe5vZ/T/Hu5rUK9mf2Jg59a3bnADsBz1KcVP3cIHPeVwO/ozhBe3Fa3tbVPt8EbgRupTh3cA2wU5rXPxH4ArCe4hLQEyPiufS4v6F4h/ACMB/4TgX9+Y/0e72k+yj+Rs+neHfxPMVJ6s9VsD+zN5G/RMXMLB8e6ZuZZcShb2aWEYe+mVlGHPpmZhkZ1bdh2H333aO9vX2km2Fm1lRWrVr1XES09LdtVId+e3s7nZ2dI90MM7OmIqnvJ8Zf5+kdM7OMOPTNzDLi0Dczy8iontPvz6uvvkp3dzcbN24c6aYMaOzYsbS1tbH99tuPdFPMzF7XdKHf3d3NrrvuSnt7O5JGujn9igjWr19Pd3c3kyZNGunmmJm9rummdzZu3MiECRNGbeADSGLChAmj/t2ImeWn6UIfGNWBv0UztNHM8tOUoW9mZtVpujn9vlaurO+I+uijh3ar6eXLl3PeeeexefNmzj77bC644IK6tsPMbDg0feiPhM2bNzN37lxWrFhBW1sbhxxyCCeddBL777//SDfNzBpA84d/+jbmDc93nXh6pwr33HMP++yzD+9+97vZYYcdmDFjBjfddNNIN8vMbFAO/Sr09PQwceLE19fb2tro6ekZ4BFmZqODQ9/MLCMO/Sq0traybt2619e7u7tpbW0dwRaZmQ2NQ78KhxxyCI899hhPPvkkr7zyCkuWLOGkk04a6WaZmQ2q6a/eGeollvU0ZswYrrjiCo477jg2b97MWWedxZQpUxreDrP+1Psy5v6MxN+d1UfTh/5ImTZtGtOmTRvpZoyoRly2BsN36ZpZjjy9Y2aWEYe+mVlGHPpmZhlx6JuZZWTQ0Je0UNKzktb0s+0LkkLS7mldki6X1CXpAUlTS3VnSXos/cyqbzfMzGwohjLSvxY4vm+hpInAscCvSsUnAJPTzxzgqlR3PDAPOAw4FJgnabdaGm5mZpUb9JLNiLhDUns/my4DvgSU7zQ2HbguIgK4S9I4SXsBRwMrIuJ5AEkrKF5IFtfW/PpfNjjUywPPOussbr75ZvbYYw/WrNnqTZCZ2ahU1Zy+pOlAT0Ss7rOpFVhXWu9OZdsq72/fcyR1Surs7e2tpnkNceaZZ7J8+fKRboaZWUUqDn1JOwMXAX9b/+ZARCyIiI6I6GhpaRmOQ9TFhz70IcaPHz/SzTAzq0g1I/33AJOA1ZJ+CbQB90l6B9ADTCzVbUtl2yo3M7MGqjj0I+LBiNgjItojop1iqmZqRDwDLAXOSFfxHA5siIingVuAYyXtlk7gHpvKzMysgYZyyeZi4L+AfSV1S5o9QPVlwBNAF3A1cA5AOoF7CXBv+vnKlpO6ZmbWOEO5emfmINvbS8sBzN1GvYXAwgrbZ2ZmddT0d9kcqTswzpw5k5UrV/Lcc8/R1tbG/PnzmT17oDdBZmYjr+lDf6QsXlzzRwzMzBrO994xM8uIQ9/MLCNNGfrF+eLRrRnaaGb5abrQHzt2LOvXrx/VoRoRrF+/nrFjx450U8zM3qTpTuS2tbXR3d3NaL4vDxQvTm1tbSPdDDOzN2m60N9+++2ZNGnSSDfDzKwpNd30jpmZVc+hb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGhvLF6AslPStpTansnyQ9IukBST+UNK607UJJXZIelXRcqfz4VNYl6YK698TMzAY1lBuuXQtcAVxXKlsBXBgRmyRdClwIfFnS/sAMYArwTuA/Jb03PeZK4GNAN3CvpKUR8VB9utG/lSs1nLsH4OijR+8tns3M+hp0pB8RdwDP9ym7NSI2pdW7gC33EJ4OLImI30fEk0AXcGj66YqIJyLiFWBJqmtmZg1Ujzn9s4Afp+VWYF1pW3cq21b5ViTNkdQpqXO03zPfzKzZ1BT6ki4GNgE31Kc5EBELIqIjIjpaWlrqtVszM6OGL1GRdCZwInBMvPHdhT3AxFK1tlTGAOVmZtYgVY30JR0PfAk4KSJeLm1aCsyQtKOkScBk4B7gXmCypEmSdqA42bu0tqabmVmlBh3pS1oMHA3sLqkbmEdxtc6OwApJAHdFxGcjYq2kG4GHKKZ95kbE5rSfc4FbgO2AhRGxdhj6Y2ZmAxg09CNiZj/F1wxQ/6vAV/spXwYsq6h1ZmZWV/5ErplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWWk6tswmOVO84f/1t0xz7futvrySN/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjAwa+pIWSnpW0ppS2XhJKyQ9ln7vlsol6XJJXZIekDS19JhZqf5jkmYNT3fMzGwgQxnpXwsc36fsAuC2iJgM3JbWAU4AJqefOcBVULxIUHyh+mHAocC8LS8UZmbWOIOGfkTcATzfp3g6sCgtLwJOLpVfF4W7gHGS9gKOA1ZExPMR8QKwgq1fSMzMbJhVO6e/Z0Q8nZafAfZMy63AulK97lS2rfKtSJojqVNSZ29vb5XNMzOz/tR8IjciAqjb/V8jYkFEdERER0tLS712a2ZmVB/6v07TNqTfz6byHmBiqV5bKttWuZmZNVC1ob8U2HIFzizgplL5GekqnsOBDWka6BbgWEm7pRO4x6YyMzNroEG/OUvSYuBoYHdJ3RRX4XwNuFHSbOAp4FOp+jJgGtAFvAx8BiAinpd0CXBvqveViOh7ctjMzIbZoKEfETO3semYfuoGMHcb+1kILKyodWZmVlf+RK6ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llZNB775g1m5UrNdJNMBu1PNI3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCM1hb6kz0taK2mNpMWSxkqaJOluSV2Svitph1R3x7Telba316UHZmY2ZFWHvqRW4C+Bjog4ANgOmAFcClwWEfsALwCz00NmAy+k8stSPTMza6Bap3fGADtJGgPsDDwNfAT4Xtq+CDg5LU9P66Ttx0jyp2jMzBqo6tCPiB7g68CvKMJ+A7AK+E1EbErVuoHWtNwKrEuP3ZTqT+i7X0lzJHVK6uzt7a22eWZm1o9apnd2oxi9TwLeCbwVOL7WBkXEgojoiIiOlpaWWndnZmYltUzvfBR4MiJ6I+JV4AfAEcC4NN0D0Ab0pOUeYCJA2v52YH0NxzczswrVEvq/Ag6XtHOamz8GeAj4CXBKqjMLuCktL03rpO23R0TUcHwzM6tQLXP6d1OckL0PeDDtawHwZeB8SV0Uc/bXpIdcA0xI5ecDF9TQbjMzq0JNt1aOiHnAvD7FTwCH9lN3I/DJWo5nZma18f30a6T5w3/VaczzLJiZ1Ydvw2BmlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRmoKfUnjJH1P0iOSHpb0R5LGS1oh6bH0e7dUV5Iul9Ql6QFJU+vTBTMzG6paR/r/AiyPiP2A9wMPAxcAt0XEZOC2tA5wAjA5/cwBrqrx2GZmVqGqQ1/S24EPAdcARMQrEfEbYDqwKFVbBJyclqcD10XhLmCcpL2qPb6ZmVWulpH+JKAX+DdJP5f0bUlvBfaMiKdTnWeAPdNyK7Cu9PjuVPYmkuZI6pTU2dvbW0PzzMysr1pCfwwwFbgqIj4AvMQbUzkAREQAUclOI2JBRHREREdLS0sNzTMzs75qCf1uoDsi7k7r36N4Efj1lmmb9PvZtL0HmFh6fFsqMzOzBqk69CPiGWCdpH1T0THAQ8BSYFYqmwXclJaXAmekq3gOBzaUpoHMzKwBxtT4+L8AbpC0A/AE8BmKF5IbJc0GngI+leouA6YBXcDLqa6ZmTVQTaEfEfcDHf1sOqafugHMreV4ZmZWG38i18wsIw59M7OMOPTNzDLi0Dczy4hD38wsIw59M7OM1Hqdvo1SK1dqpJtgf8A0vzHPr5hX0V1cbAg80jczy4hD38wsIw59M7OMOPTNzDLi0Dczy4hD38wsI75k08z+YPhS5cF5pG9mlhGHvplZRhz6ZmYZceibmWWk5tCXtJ2kn0u6Oa1PknS3pC5J303fn4ukHdN6V9reXuuxzcysMvUY6Z8HPFxavxS4LCL2AV4AZqfy2cALqfyyVM/MzBqoptCX1AZ8HPh2WhfwEeB7qcoi4OS0PD2tk7Yfk+qbmVmD1DrS/2fgS8BraX0C8JuI2JTWu4HWtNwKrANI2zek+m8iaY6kTkmdvb29NTbPzMzKqg59SScCz0bEqjq2h4hYEBEdEdHR0tJSz12bmWWvlk/kHgGcJGkaMBZ4G/AvwDhJY9Jovg3oSfV7gIlAt6QxwNuB9TUc38zMKlT1SD8iLoyItohoB2YAt0fEacBPgFNStVnATWl5aVonbb89Ivy1OGZmDTQc1+l/GThfUhfFnP01qfwaYEIqPx+4YBiObWZmA6jLDdciYiWwMi0/ARzaT52NwCfrcTwzM6uOP5FrZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUaqDn1JEyX9RNJDktZKOi+Vj5e0QtJj6fduqVySLpfUJekBSVPr1QkzMxuaWkb6m4AvRMT+wOHAXEn7U3zh+W0RMRm4jTe+AP0EYHL6mQNcVcOxzcysClWHfkQ8HRH3peXfAg8DrcB0YFGqtgg4OS1PB66Lwl3AOEl7VXt8MzOrXF3m9CW1Ax8A7gb2jIin06ZngD3TciuwrvSw7lTWd19zJHVK6uzt7a1H88zMLKk59CXtAnwf+D8R8T/lbRERQFSyv4hYEBEdEdHR0tJSa/PMzKykptCXtD1F4N8QET9Ixb/eMm2Tfj+bynuAiaWHt6UyMzNrkFqu3hFwDfBwRHyztGkpMCstzwJuKpWfka7iORzYUJoGMjOzBhhTw2OPAE4HHpR0fyq7CPgacKOk2cBTwKfStmXANKALeBn4TA3HNjOzKlQd+hHxU0Db2HxMP/UDmFvt8czMrHb+RK6ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llpOGhL+l4SY9K6pJ0QaOPb2aWs4aGvqTtgCuBE4D9gZmS9m9kG8zMctbokf6hQFdEPBERrwBLgOkNboOZWbYUEY07mHQKcHxEnJ3WTwcOi4hzS3XmAHPS6r7Aow1rYOPsDjw30o0YITn3HfLuv/veOHtHREt/G8Y0sBFDEhELgAUj3Y7hJKkzIjpGuh0jIee+Q979d99HR98bPb3TA0wsrbelMjMza4BGh/69wGRJkyTtAMwAlja4DWZm2Wro9E5EbJJ0LnALsB2wMCLWNrINo8Qf9PTVIHLuO+Tdf/d9FGjoiVwzMxtZ/kSumVlGHPpmZhlx6FdB0jskLZH0uKRVkpZJeu8wHeuGdNuKNZIWStp+OI4zxLb01+//lvSOUp0rJV0o6WhJGyTdL+lhSfPS9kNT2f2SVkv6RJ9jnCwpJO3Xz/HfJqlb0hXD39vBSWqXtKaf8mslPVnq589S+ZmSelPZI5I+3+dxn5b0gKS16d/m25LGNag7VZP0Yp/1M8v/R5LmpP4+IukeSUeWtq1Mz+8tz5M5NKn0vP330vqY9P99c1o/U9Jrkg4s1Vkjqb2R7XToV0iSgB8CKyPiPRFxMHAhsOcwHfIGYD/gfcBOwNnDdJwBDdDvq4CvpzpTgaO2rAN3RsRBQAfw6bR9DdCRyo8H/p+k8gUFM4Gfpt99XQLcUeeuDZe/ioiD0s8HS+XfTX0/ArhY0kQo7kkFfB44ISKmAFOBnzF8z6uGkHQi8OfAkRGxH/BZ4DvlgQJwWunf5NJ0ZV8zegk4QNJOaf1jbH1JejdwcUNb1YdDv3IfBl6NiH/dUhARq4GfS7pN0n2SHpT0+u0lJJ2RRnCrJV2fytol3Z7Kb5P0rv4OFhHLIgHuofhsw0jYVr+/CrxH0ocp7qt0bkS8Wn5gRLwErAL2iYiXI2JT2jQWeP1KAkm7AEcCsyku56W07WCKALy13h2r0XaSrk6j81tLf/ADioj1QBewVyq6GPhiRPSk7ZsjYmFENPsn0r9M8QL4HEBE3AcsAub2U3cXiuDc3Ljm1d0y4ONpeSawuM/2m4EpkvZtaKtKHPqVO4AiwPraCHwiIqZSBOQ3VJgC/DXwkYh4P3Beqv9/gUURcSDFaP7ygQ6apnVOB5bXpxsV67ffEfEa8Dng+8CjEbHVSFzSBOBwYG1aP0zSWuBB4LOlF4HpwPKI+AWwPgU9kt4CfAP4Yt17VbvJwJVpdP4b4E9T+T+Vpndu6Pug9CI/FnggFU0B7mtAe4fDTqW+3g98pbRtCls/bzpT+RY3SHqA4pYrl0REM4f+EmCGpLHAgcDdfba/BvwjcFGjG7aFQ79+BPx9evL+J9BKMTL9CPAfpZHO86n+HwHfScvXU4xwB/It4I6IuLPeDa9VRNxPMW3zrT6bjpL0c4rR+de2fCYjIu5OIXkIcGH6A4FiZLQkLS/hjSmec4BlEdE9fL2o2pOp/1CEW3taLk/vnFaqf2p6jnQB34qIjX13KOl9KUAfl3TqcDa+Tn5X6utBwN9W+PjT0uDnXcAXJe1d9xY2SEQ8QPEcmEkx6u/Pd4DDJU1qVLvKRt29d5rAWuCUfspPA1qAgyPiVUm/pBjJVUTSLRQvFp2lG9PNS/v+82obXQfb6vcWr6Wfsjsj4sRtPSAiHk4nAQ+Q9ATFC+T7JAXFh/dC0l9RvEAeJekciimAHSS9GBGj4fsYfl9a3kxx3mUg342IcyV1ALdKWhoRz1D8+04FfhIRDwIHpZOhQ5ouGsUeAg4Gbi+VHUx611cWEb2S7gMOA55qTPOGxVKK81pHAxP6bkwfUv0GxdRXw3mkX7nbgR3LVxmks/F7A8+mwP9wWt9S/5NpigNJ41P5z3hj3vo04E6AiDgujZi2BP7ZwHHAzDSVMlL67bekoyrZiYpbcIxJy3tTnKT+JcULyvURsXdEtEfEROBJ4KiIOC0i3hUR7RRTPNeNksCvWkR0UrzD2zLd9w/A1yWVz9k0e+BDMZVxaen5fxBwJlu/K0TSzsAHgMcb2L7hsBCYn168t+Va4KMUg7mGcuhXKJ1Q/QTw0fT2ey3FH+wyoEPSg8AZwCOp/lqKk53/X9Jq4JtpV38BfCa91T+dN/74+/pXipH/f6W3/JW+da6LAfr9TIW7OhJYneZ+fwick6a+Zqb1su/T/1U8zaA8p3//Nq5IuZTiObBrRCyjOK/zY0kPqbjMczPFLUuaVkQspQjBn0l6BLga+HREPF2qdkN6PqwCro2I/s6ZNY2I6I6IAc/Rpe8TuRzYozGteoNvw2BmlhGP9M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwj/wvpbHE3n0h+EAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "name_list = dataTarget.columns \n",
    "count_0 = [i[0] for i in temp_count]\n",
    "count_1 = [i[1] for i in temp_count]\n",
    "\n",
    "x =list(range(len(count_0)))  \n",
    "\n",
    "total_width, n = 0.8, 2  \n",
    "width = total_width / n  \n",
    "  \n",
    "plt.bar(x, count_0, width=width, label='0',fc = 'y')  \n",
    "\n",
    "for i in range(len(x)):  \n",
    "    x[i] = x[i] + width  \n",
    "plt.bar(x, count_1, width=width, label='1',tick_label = name_list,fc = 'g')  \n",
    "plt.legend()  \n",
    "plt.title('0-1 counts')\n",
    "plt.show() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e6316e1",
   "metadata": {},
   "source": [
    "#### 数据分布适合，暂不做均衡性考虑"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2aacc3e7",
   "metadata": {},
   "source": [
    "#### 相关性矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3d2ec524",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ8AAAF0CAYAAAD8YTTVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAxnklEQVR4nO3deZhcRbnH8e8vIRBZQoCA7KsGECWAubggCooYtwviFWTfgyBeQRBQFhEVCSjIZdMosgYFL+CCQgwCCipqQEAWBcKFILIFyAaSQPLeP6o6dJqZyUxPzzl9pn+f5znPpM+p7q7DMP12Vb1VpYjAzMysSEPKroCZmXUeBx8zMyucg4+ZmRXOwcfMzArn4GNmZoVz8DEzs8I5+JiZWeEcfMzMrHAOPmZmVjgHHyuNpCH5p8qui5kVy8HHSiHpP4CTJa0deY0nByGzzrFU2RWwziPpDcBlwBuAFSV9PyLujYiQtFREvFpyFc1sgDn4WBkE/IsUfLYBVpN0YUTc6MBj1hnc7VYRg6lLKiJeAq4FzgW+BGwEfE7SrpIelLTlYLpfM3s9B582JmkVSetKWisG394XM4ADI2IK8N/A0sD5wPLAk7kLzgHIbJBy8GlTkvYFrgJ+D9wmae+Sq9QyOaj8EVhe0soRcTswCxgOPAMcKGmNQRhwzSzzmE8bkjQOOA84CngJ2BQ4SdLfIuKuMuvWCjmoPCrpaWD1HGg/DHwU+DjwGeBl4Nvl1dLMBpKDT5uRtCpwOnBiRHwvnxsN7AFsBtxVXu36R9KQiFgoaWhELACmAT8D1gP2iYjfAr+V9BgpG87MBil3u7WfMcDzwC21ExHxIHA78HYASUNLqVkfSVpa0smSPiRpo4hYCJADD8BFwHzgmIj4cW3SaUScExEzy6m1mRXBwaf9PAhcFBF/hcUCzVPA6pA+vCVtlrvn2tk5wEnADqQWzemSdpO0Qr7+N2An4AKAWnAys8HPwafNRMR04HJY1E1VayW8AKyWz68K/AnYsZRK9t7VwP+Rkgv2At4C7A1Mk/QV4JMR8XBEzCuxjmZWAjmhqD1JUi3dOP/cE/jviHiHpD8DT0fEx8uuZ3dyRtswYALwbEScKml4RLycx3ReIX35eRW4DZgQEf8or8b9U/s9lV0Ps6pwy6fN1LrZah9kdR9oTwNDJP0KWLmdAw+kekfEfOBW4MuSxuXAcwKwHLA/8F7g17l8ZQJP3YKoQyWNhMV+T2bWC275tJH6b8+SLgV+ERE/yY+3A24CngU2iYgXyqpnb0gaFhGv5H+fSgo4DwJnAbtGxE/rytay39pera6S1gMOATYGJkbE5Hx9iMeuzJbMLZ82JOmLwMdIH9Y1fyWlH+/QjoFH0lKS1pf0nlrgkVRL5b+RdD/nAAfXAk9ddltVAs+QHHiWB35OWo3h8rrAs0JOJfffldkSuOVTku6+IUt6B2nuy/4RcX3DtZHtmoIs6QrSPKRRwBPAThHxZN31LwJfAD4UEfeUU8vWkHQ78E9gj9y1iKQLSF2J20fEH/M5jwOZdcPf0EqQtw1YKGmEpA9K+rykrSWtTcoO270+8NTWOGvjwHMV8CZScDmM1CJ4e0Oxa0ktuY/n51Ry3TZJnyctA3RUXeA5CtiNNL51q6T9wONA1j4kfVrSrZJmS1riyvGSxkr6s6SXJE2TtFer6+QVDgqWWzy1X/5NpAU11wbmAHcDp0fEzfXPaecPMUmHA1sAH4yIx/K5vYCtJL0deA44PyIeljQF+JqkKyPi4dIq3YS6lurGpPlJT+TzqwPvA/YE/gIcDvxQ0uiI+HJZ9e0vt9oGnRdIC/e+AZjYU0FJKwLXA98CtiUlBl0raVqtVd8KDj4Fq3W1SZoEzAX2jojHJe0KfJq0u+eJrfwlDxRJSwPrAj8kdUMh6c3AJ4A1SRl6HwY+RGrxnAUMr1rggcUmwG4K/F9EvJpbsE9JOiwiavf/HdIqFW8uqapNkfRGYARpxYnpDjyDS9245Ha9KL4LaU3J0/P/B1MkXQuMJ83ZawkHn4Ll7qbVgNHA2RHxOEBEXCXpX8APgHG08Jc8UCJivqRTgFF5IH4p4FLg+8DREfGipHcDv8mp1jdIOrHUSvff/cDOklaPiKfyuX/VLkbETEmzycG4CiRtCZwNbELq9v2+pB+SM+ZLrVwH+ND2y8Vzzzefc3PHPfPuIy3EWzMxInps3SzBGOCvDb/7O0kTxFvGwadgecLoc8AC8rfj/A361Yi4TdJPSR9up0XEv8usa3fyXKThpKDzGDA3nxsCnAn8tJZmDbxI+kB7Adq7C7FeY7dT3eNrSEsCnSPphIj4R33iiKRDgI8A7yy80k2QtAowBbgE+BopffzwiPhBqRXrIDOeX8CfJq/d9POHrTHt5YgY28IqrUDa4qTeTFLLuGWccFCA2sRRSSvkD7FXSanTh0h6Syy+dfRM0lyedv7dnE1aleA3kr4HKV06IuZHxE/qAg/AWqRA23bp4d1pmG+1kqSVSMEW0jjdj4D3kwLQx3K5LXIywpnAIRExrYSqN+O3wOSIOCrSxn5HAvMkHSdpoqTP1ApWNUnE+mwOsGLDuZHA7Fa+iVs+A6zWqpG0Dmmg70LgfyPiUEkbArdIOhKYTvp9HA2cGhEvllfr7km6DliVlA6+DvDJPMt/71r2Vy43jNSN80PgzEgrc7e9hsDzDWAbUibflZIuyWniX5T0DHAgcI2kfwKrkBIRvhAR15RU/T6RdDnpd1TfSjsE2Jy0qd8w4CBJm0bE56vSaq2eYEF7zUu+G9i54dyW+XzLOPgMoForJ086vBn4A/B4XZE9gBOB75IG+J4jTVo8s/DK9oKkM0lzef4jImbkhIMXgU8CK5NW3kbSMqSkgy8BP4uI00uqcp/VBZ5vkwZeTydl832OtB7dPbncGTl7byPSgqn3A3dXLJniZuC/SJN/95e0C2lL809FxHWSlgO+Duwt6cxaNqO1VgALGdi4nntfhpGya5FUa8nP6+JLxbXA6Xlu3tmkjLddgA+2sk4OPgOo7pd6Jinza9+6DzdFxHPAEZLOB5YFXo6Iv5dT255Jej9wBHB8RMyARQkHPwY+RfrmXxuAX4YUlL4XEeeXUN2m6LXN7g4gzVd6e0Tcn6+9DdhT0oTIK0xE2lX2LtLq3ZVRa91FxIWSHgGuknQ3aVO/L0TEdQA5YeRPpGzFynS5SdoZGBZ5aaoqWMiAt3z2Ju2fVVMbT94g98pcD7wlIqbnpJmPkHZTPgV4EvhMqzNwHXyKMRL4fU42GJrHR2pBaK1275LK35qeBm4APixpXl3r7ATgvoi4r1Y+ImZL+mVUbI2zHHg2I2UcnlIXeFYifTAPA47Jj58gJR8sFREt7Y4YaPXfdCPiZknvInUHvwW4t6H4e0hdcNOLq2G/fRg4WNKapDlmryzpCYNdRFwMXNzN5UdJE8Pry/8F2Hog6+TgU4yVgfVh0UZwtW+Rq5BaPlflX3bbydlQ50bE7pK+RJpEubukN5DuazRpEtpiC4RWKfA0ZLY9TOrbPlDSPaQ13G4A7gOuAx4h7U30JuCrwB2SdoiIxuygtqO0Jt3WpG6UZUm75T4cEQ/lxIkfAb+W9JmIuELSYcBBwGZV+n1GxCGS/k7qcdgoz5tr299PECzowOE0B58B0JimC/wEOCFnDl0VEc/nclsB+5D+6NvVB4GPSnpDRNyd5+kcCexKGv/5dET8KwfUynxA1TQkGIzJrZgtJV0GTCIFo5eAT9SSQHKrLiRtS9qrqG0/2Br8nDTDfS3S+NUXgd9LOi8ifgx8TGmS7IU5GO1CGv+pSuZe/bSFs5T2jboU2DAH1LadezXQYz7tqJ3TeStHr215vSi1Oj++hpRafSRwhqRDJR0P/Bg4LSLuLLyyvXcdqYvpPQCRJlaeRMpiuwPYQ9LWeQyhUn9BDYHnBmAfvbaf0t7AV0gB9kHq5jjUnhMRt7brGF2jfH9DgH1JLZmNgANIre+vStodICKOAI4irbbx5Yj4RTk17htJ20lajdQ1CkDOOnwfaUmkX0vaoqTq9SiABUTTR1V5VesW0Wv7vCxDyhDZkDRZ69yImJTLnAy8i5TaegtwW0ScUU6Nl0xpxQKR5oLcHxEHafF9evYmDWQOBb6fvz1XjqTvAh8lfSjPbrj2CeAq4FfAl0n/HSr1RyNpH+A44ANRt9J4vrYNadv2fwI7Rp7YLGmDiPi/wivbhPx3dRIpW/RW0v+PPyeNZfyF1Nq7jJSZeAhwc7TRNh5bjFk6ply/atPPX22tf90RrZ1kWgi3fFogf4Ou/c/8G9L/5DeSJiReJulbABFxMqkr403Af7Vz4AHI3RevkLou1sznFu3TExGXAWeQvm1uXlpFm6DXdiM9gjSGs2Mt8EjaSGm18aERcS0wlvSF4SLg/arIfj11Y4vrkVpvTzVei4jfAweTvhR9oHatQoFnadI2HtOBeaTJzy+S7umXpOkNvybd/wb539uWUllbjMd8WqCu6+YU0kDu+3Ma8k+AaaSVDDYmpVo/T/rjaEuS3kf6wx0GTCUts7Ei8EFJ74mI2+K1RTVfjYgpkqZHtbbBVs5s2440KL1dRDyQr40C/gx8Mbdkh+axrjeTMsGOjYjflFX3vqhroa0NrJHHqWq/t6jrdryLtLLGmg3Pa2uSNiX1Wh1F+jv7FPBu0l5Yc5QmcY8hzdPamJQ0skxE3FJKhbsR4IQD6xuliVrrkJr7M4GVSKsTzFeaHT8G2IHU73wxcJuknds1tTrfz7tJ4wDTSXXfBHiI1JWxr6QFEfHHHIBqaeOVCTywaH295UirbP+dxb8M/JqU3XZRLlvL3psNrKu0mkPbkzQCmJMDyf2klQpqCRWNmxkG8ABpWadKUFqF+3bgMdIXhbMkPU3aU2qypCMi4s+k7MRrJS2d/y6XLbHa3apcpk4LVKL7oB3lrpeLSR9SF+Q/5AuBO/M3sk8CR0SaFf4wqRXxPK9fM6kt1N3P9sALEXFIRHyI1FXxMWBHYDvSlg/j8jfotuk376ucuXYmac25r0jaQdLVpHWtDqu1DCC1BpW2vGjbDf26MAW4MX/YXk4KQJcrrTLemAq/HWmM8oGiK9kP7yKNqc4kzb06JCKuIC1PNRu4SGnFhsVExEuF1rIXoh/JBlVOOHDwad4U0mKTnyVlDRERd0XEI6SWwxDgT7msSBljH482nc/Da/dzLGnfDnJywULSEhw3Av9Jyvr6JqkVVMmWc23MJo9ZHU+aYPcDUgv1wIiYlYNrKE06/RVp0LpKTiJ9cfgt6UN6AqlL+BJJhygtmPoWSZ8kfWn6elUy97Kfk2bljyK1bg6S9BXSmM8XSNmlpyntMkvUrTto7cHBpwm5S20t4OCIuDvSMiT1/y2fJnXHnZ0zwq4CHoy8LEu7abifv0bE3Dwe8Aos6qYamsdFdiRtgrdsLL4ad9tTMjSP99SSJm4hJRzcRprJ/0lJq+VuxZVIgeeSiLiktIo3IdLmYR8ndSneQBrXOZKU1XYBqbvxz/nctyPignJq2ne5C20haZLv30iJBVNIyTznkv7+jgJ+QW6pl1XXXglY0I+jqir5zbUsuRtmFdKcl/+JiEV95PHaDqVDI80Y35/0B/A24LqIOLaMOvdkCfdTvwZd1I19zKGC2UJ1g+u1rsLTlTZRu48032pfUutgL2BVpa0iJpK+NBxWRp2bUZfyPyQi7lNaDPanpOC6D2lQfkNgK9KH9D1RkUmkklaMiFl1rZgHSa26nSNivzzmswfp93Y8aemnmyPihnJq3DtpYdHO4+DTB7kFsJAUUL4Frxu4XTRATUpb/TBp7a+nXvdibaCX9xO5lXA+aWHRf1clG6pG0qHAG5VWZp6ttBjqW0kD1mNJS85cEhFHS5oO7EcKRs9HxMZl1bsvJI0GZkbEM3WD62uQdpU9mdRt+FPg5Ig4hdTqqYycmfgVSTeSehLmRNrC/GDgZ5L+KyLOljQX2I30heLAyIuktjexoDrrtraMg0/fLQReJX1z/EV0seaVpDeRMt1GRvsvsb/E+yHNE9kcWKdqmW25dbcRKXNvGaVtEIaQlst5SNIGpBbqQUqr+55AyvQ7kpQ00vaUVtL4HmlH2eMj7TkEaa7ZdRFxitLKDf8HfEvSfwC7RpvulNtIaU26c0kLn76DtLfMqpJ+BUwmfYl4L2mfrAslPUvqgnuopCpbL3jMpw/yB9lLpP7zXSVtWZcRVf/V5d2kD/W5hVeyD/pwP9vkn1VZw2yR3GV4NGky7A6kFs1I8oTLSJMpTyJtjrcdaZ29P5Amnc4oocp9lrtCv0Nq3UyQ9E5JPyMthX9kLrMgIiaSls15O2nuTyVExFzSCg2/JKXC3w98G9idtGfUbsDhStsAEBE/Bw6NhtUq2lUAC6P5o6ocfPogf5DNJy3IuCZpE65tJa1QmzsiaYd8/ux27W6rGWz305NISxwdQepm24a0R03t2vOkfUsuB94MjI6IeSVUs2kR8TPS/S0AriTd46dyN+NSdeV+BWwSEZVqFeTus+Pzw+1JqxlsRfq9nQJclO+tVr4SrbqaBbnrrZmjqry2W5MkfYDU1bESaUfIB0gzqd8M/CQiTiyvdn032O6nO0p7vFxFWtX5Bzko1V8fHW06Cbg3lBbXPIu0IOrlEVEby1u03UWVKU0u/TawKXBFRHw7nx8WaemnxcYsq2CzzZeOH/9ytaafv/m6T3htt04SaYmVd5I+yDYgDVI/A3ytih/Ug+1+uhMR/wLGke7tMEmfz2MKteuVDTwAEfEMaQ+eXwO7STpL0kqDIfAARMTTpOWffkPaV+rsfH+1aQGVCjydzC2fFlDaWK1yTf3uDLb76UruivoGaZzndtIky8osL7MkOcHgMNIYzyzgkIh4vNxatU7D/c0kbfNcyfvbbPOl44rr3tj087dY75+VbPk42601Xq5a+vESDLb7eZ1IE2SPlfQ5Ukp8JQaneyu3dM6R9BRpbk8lkid6azDdX5p8Vt2xm2a55WMdT9Ky0YZrfrWKpOER8XLZ9RgoVb+/TTdfJi69bo2mn7/1eo+55WNWRYM58ABU+YO5NwbD/S2Mzmv5OOHAzMwK55aPmVmJOnXMxy2fEkkaX3YdWm0w3hP4vqqkevckFsSQpo+qqm7NB4eK/ZH0ymC8J/B9VUml7imtaj2k6aOqqltzMzOrLI/5NBi18tBYf51hhbzXumstxdgxwwvJdX/ovuWXXKgFhms5VlxqVCH3FAuKm8w+nGUZoZULua/5ayxXxNsAsNSKKzF8zXUG/L6WnlXcAgvDl16REcutWcjvas5LT86IiFX7+zqdOObj4NNg/XWG8efJ65RdjZb7yCbvLbsKLbdg9qCaF7rI4we/u+wqtNy611VuQfRemXLnVx/r72tEqNJjN81y8DEzK9nCDmz5dF64NTOz0rnlY2ZWojTPp/PaAQ4+Zmal8piPmZkVrDbPp9N03h2bmVnp3PIxMyvZgg5c1drBx8ysRIGccGBmZsVb6IQDMzMrUqemWnfeHZuZdRhJQyWdIelZSXMkXS1pVA/lj5Y0LZd9SNJhra6Tg4+ZWYkCsSCaP3rpOGAn4B3A2vncZV0VlPSfwFeBPSNiBWAf4AxJH+zfnS7OwcfMrGQF7OczHpgQEY9ExCzgGGCcpPW6KPsm4O6IuB0gIv4I3AOMacGtLuLgY2ZWogj6u5PpKElT647FNtOTNBJYF7jjtfeMacBsug4oPwZGSNpG0hBJ2wKjgRtaed9OODAzq7YZETG2h+sr5J+N+1rMBEZ0Uf4Z4H+Bm3mtgXJERNzbn0o2cvAxMyuVBnpLhTn554oN50eSWj+NTgR2B7YAHgDeAvxc0r8j4sJWVcrdbmZmJQr63e3W8+tHzASmA1vVzknakNTquaeLp7wduDYi7o/kPuCnwMf7f7evcfAxMyvZAoY0ffTSROBYSRtIGgFMACZHxKNdlP09sLOkNwNI2hTYmboxo1Zwt5uZ2eB3GrAS8BdgGWAKsBeApD2B70XE8rnsGaQuuil5LtDzwE/ya7SMg4+ZWYkCsXCAFxaNiAXA0flovDYJmFT3+FXSvKDjBrJODj5mZiXrxOV1HHzMzEoUdObCov26Y0lvlHR2XgNonqQnJF0v6SOtqmATdVpf0oWSHpH07/zzm5LeUFadzMxscU23fCStT8qKmAN8CbibFMw+AHyXNKO2DJsAQ4FDgYeATUmZHquQlpgwM2sjYsHAzvNpS/1p+Zyff46NiKsi4h8R8UBEnAtsDiDpC5LukfRibhX9IC/1sIikd0q6KZeZlf+9Zr62jKTvSHpa0suSbpf0np4qFRE3RMR+ETE5r2P0S+AbwCf7ca9mZgOi1u3W7FFVTdVc0srAOOC8iJjbeD1PagJYCBwBbAbsAWwNnFP3OmNISzg8DGwDvBO4ktdaZKcDuwEHAFsCfwNukLRGH6s8Anihh/sZX1sX6dnnFvTxpc3M+mdBbv00c1RVs91ubwJEWnqhWxHxnbqHj0o6BviZpH0jYiFpZdW7IqK+O+wBAEnLkbrODsqtFyR9Bng/8FnghN5UNK/aejRwag/1nEjqmmPsmOHRm9c1M2uFCFW6BdOsZu+4V+FW0vslTZH0T0lzgGuApYHVc5EtgZu6efpGwDDSuBKwKFf9j6S1hpD0XUlza0cX7/9G0kqsU4CzenVnZmY24JoNPg+Ruio37a5AbnH8ktSS+RRpvaAD8uWlm3zfmlrr5CTS4ne1o/79Vyd16d0L7B0RbtGYWVsayLXd2lVTNY+I54HJwOGSlm+8npMKxpKCzJER8ceIeBBYs6HoX0ndaF2ZBswnjQXVXnco8C7g/lyPZyLi4dpRV24N4BZS4Ns9z9g1M2s7ASzMK1s3c1RVf8LmZ0ndb1MlfUrSxpI2kXQoaaXUh/LrH5EXs9udlHxQ7wxgS0kTJY3Jr3GQpHUj4kXgAmCCpI/kxe0uAN7Ia5l2r5Mz5X4LPJXfb5Sk1fMxtB/3a2Y2ANSRLZ+m5/lExCOStgK+TFohdS3gOdJ8n/ERcY+kzwPHAl8H/kAa+L+y7jXukrQDKRngdmAeMJXUXUd+LsBFpL0n/gqMi4gne6jajsCb8zG94doGwKNN3K6ZmbVQv5bXyUHgc/no6vr/AP/TcPqqhjK3Ae/t5vnzSK2XI/pQp4uBi3tb3sysTGmeT3W7z5rltd3MzErmhUXNzKxQRWyp0I46L9yamVnp3PIxMyvZwg5sBzj4mJmVKAIWdGC3m4OPmVnJPOZjZmZWALd8zMxKlLLdOq8d4OBjZlayKu/L0ywHHzOzEnmFAzMzK0Fndrt13h2bmVnp3PIxMytZlfflaZaDj5lZiTzJ1MzMSuExHzMzswK45WNmVqJO3VLBwcfMrGROODAeum95PrJJl7t6V9qv/v67sqvQcm+9fc+yqzAg1jh3XtlVaLnZo1couwoD487+v0SnTjL1mI+ZmRXOLR8zs5J1Yrabg4+ZWZnCCQdmZlawoDMTDjqvrWdmZqVzy8fMrGSd2O3mlo+ZWYlqqdbNHr0haaikMyQ9K2mOpKsljeqh/GqSLpH0nKTZku6StGar7hkcfMzMSjfQwQc4DtgJeAewdj53WVcFJQ0HfgPMBzYGRgJ7AnP7cYuv4243M7MSFbS8znjglIh4BEDSMcDDktaLiMcayu5LCjiHRcQr+dx9ra6QWz5mZtU2StLUumN8/UVJI4F1gTtq5yJiGjAbGNPF620PPARcnLvd/i7pyFZX2i0fM7OS9TPVekZEjO3hem1to1kN52cCI7ooP4oUgI4A9gc2B26Q9ExETOpPReu55WNmVqYY8DGfOfnnig3nR5JaP12VfyIizo6I+RExFbicNGbUMg4+ZmYlGuhst4iYCUwHtqqdk7QhqdVzTxdPuStXq6uqtoyDj5nZ4DcROFbSBpJGABOAyRHxaBdlLwZWkfTZnKI9hpTtdk0rK+TgY2ZWsgJSrU8DfgH8BXgCGArsBSBpT0mL0qhz9ttHgINI3XL/C5wcEVe27o6dcGBmVqoiUq0jYgFwdD4ar00CJjWcuwXYciDr5OBjZlay8PI6ZmZmA88tHzOzknXilgoOPmZmJYrozFWtHXzMzErmMR8zM7MCuOVjZlaqQla1bju9bvlIeqOksyVNkzRP0hOSrs+zYF+WtFtDeUm6RdL1+fHFkiIfr0h6RNK3JC2Xr68qabKkf+XXf1zSeZIa1yOqvf7dkl6VNHoJ9f5Sfs9ze3uvZmZFilDTR1X1KvhIWh+4E/gQ8CXSKqc7AL8EjgWOB86TtHrd0z4PvA04oO7cjcAawIbACcBhwLfytYXAtcDHgdHAfsAHgO93UZ+tgdWAS4EDe6j3O0n7WHS1fpGZWemK2Mm0HfW22+38/HNsRNTvZveApMtJSzD8J2n9oP/MrZFTgf0i4sm68vMi4qn87yskbQ/sDBwaEc8B360r+5ik80nBrtGBwBXAT4GrJB0fEa/WF8gtpkmk4PeVXt6nmVmxImW8dZoltnwkrQyMA85rCDxAWjE1IhaSdr/bTtJBwCXAtRFx1RJe/t/AsG7ed01gF+C3DeeXAz5NWuL7tvwaH+viJSYC/xsRNy+hDmZmVrDedLu9CRDwQE+F8uqoRwDfA9YBPttT+dx1tgdpr/D68z+S9BJp8bs5pM2M6n0KeDwi/hoRQQpCBzW8xsG53if0VIe68uNruwDOX/hyb55iZtYyC1HTR1X1Jvj0+u4i4ofAk8C5eQ+JRuMkzZX0MvBH4HfA5xrKHEnad2In0tjQdxquHwRcVvf4svy6awJI2pjU5bdH3f7jS6r3xIgYGxFjlx4yvDdPMTNricAJB915iPTfZ9Nevuar+ejK74AtgI2B4RGxS0Q8U18gIp6KiL9HxM+BQ4DxktYBkLQJsA1was50e5XUIhvKay2kd5G2gb2vrsz7gMPy42V6eR9mZgVoPtmgygkHSww+EfE8MBk4XNLyjdcljezD+70UEQ9HxGO9bJXU6lcLGAcCfwLGkIJY7TgZOECSSEkIb2u4PhX4cf73/D7U18zMBkBvs90+C/wemCrpRFLqsoDtSdlo6/a3IpI+BqwC3AHMBTYDzgBuj4iHJQ0D9gG+ERH3Njz3OeAkYPuIuAmY2XD9ReD5xueZmbWDTsx261XwiYhHJG0FfJm0/epawHPA3aR5NK3wMvAZUvfeMsDjpHk/p+XrHwdWBa7uon5PSvo9aTzophbVx8ysEFUeu2lWr5fXyfN1PsfrEwQay63fzfn9lvC8G0mTULu7fg09dBNGxHt7uLZdT+9tZlaWiM4MPl5Y1MzMCueFRc3MSlblrLVmOfiYmZXMCQdmZlY4j/mYmZkVwC0fM7MSBdVeJqdZDj5mZiXrwCEfBx8zs1J16DwfBx8zs7J1YNPHCQdmZlY4t3zMzErmbjczMyucJ5mamVmhajuZdhqP+ZiZWeHc8jEzK1MAHdjycfAxMyuZx3zMzKx4HRh8POZjZmaFc8vHzKxUXljUgFiwkAWzZ5ddjZZ76+17ll2Flrv3nZPKrsKAeP93Dyq7Ci2nhR3Yr9QXHfifx91uZmZlyguLNnv0hqShks6Q9KykOZKuljSqF887VFJIOqHf99nAwcfMbPA7DtgJeAewdj53WU9PkLQecBTwt4GokIOPmVnZoh9H74wHJkTEIxExCzgGGJcDTHcuBI4Hnu/j3fSKg4+ZWenUj4NRkqbWHeMXe2VpJLAucEftXERMA2YDY7qsjXQI8GJEXNm6e1ycEw7MzMrWv4SDGRExtofrK+SfsxrOzwRGNBaWtC5wAvDOftVqCRx8zMzKNrDZbnPyzxUbzo8ktX4a/QD4ekQ8MZCVcrebmdkgFhEzgenAVrVzkjYktXru6eIpHwROlTRD0gxgG+BLkm5tZb3c8jEzK1MxC4tOBI6VdDPwHDABmBwRj3ZRdp2Gxz8BbgW+3coKOfiYmZWsgIVFTwNWAv4CLANMAfYCkLQn8L2IWD7VJf5Z/0RJ84DZEfF0Kyvk4GNmVrYBDj4RsQA4Oh+N1yYB3S4XEhHbDUSdPOZjZmaFc8vHzKxsXljUzMyKpg5cWNTBx8ysTH1bJmfQ8JiPmZkVzi0fM7NSyWM+ZmZWgg7sdnPwMTMrWwcGH4/5mJlZ4dzyMTMrWwe2fBx8zMzKVMzCom3HwcfMrGSdOMl0wMd8JN0i6dyBfh8zs8qKfhwVVXrCQQ5O0cXx47oy9efnSrpb0n5dvJYkHSDp95JmS3pR0v2SzpO0SaE3ZmZm3So9+GQXAWs0HIc0lDk4nx8DXAlcJOlDtYuSBFwGnAdMBj4EbArsD7wAfHVgb8HMzHqrqDGfIZJOBcYDC4FLgWMiYmG+/lJEPLWE15hZV+ZUSUcBO5ICDcCuwJ7AThHx87rnTQf+lIOTmVnb8ZjPwNkTeBV4N3A4cASwWzMvJGmopF2BlYFX6i7tAfyjIfAsEtH9XoGSxkuaKmnqK8xrplpmZs0LNX9UVFHB5/6IOCkiHoyIq4CbgQ/UXR+fx3Lqj8MaXuMySXOBeaRut+eAH9RdHw38o/4JkibUv2Z3lYuIiRExNiLGDmOZ/tynmZn1QlHB556Gx/8CVqt7fCWwRcPRuK3rF/P5DwJ3Af8dEQ8v4X3PyM85Dliub1U2MytAfzLdKtxdV9SYzysNj4PFA9+sXgSSp3KZhyV9CrhT0p0R8fd8/UFgsYy2iJgBzJC0pPEkM7PyVDiINKtdst36JAeha4DT607/CBgtaZdyamVm1hxF80dVtcsKB8tKWr3h3PyIeL6H55wJ3CVp64j4M6nrbmdgkqQJwA3AU8A6wF6kLDszM2sD7dLy2R94suHoMmutJiLuAW4Evp4fB7A78N+kOT5TSF1xl5KSE7YaoLqbmfWPx3xaLyK26+Lcfj1d76J8l/mEEbFjw+MAvp8PM7NqqHAQaVa7dLuZmXWkqo/dNKtdut3MzKyDuOVjZla2Cq9U0CwHHzOzsnVgt5uDj5lZyTpxzMfBx8ysbB0YfJxwYGZmhXPLx8ysTB2aau3gY2ZWNgcfMzMrXAcGH4/5mJlZ4dzyMTMrWSeO+bjlY2ZmhXPLx8ysbG75mJnZYCNpqKQzJD0raY6kqyWN6qbsRyTdJGmGpBck3Spp21bXycHHzKxM/dhCuw9jRccBOwHvANbO5y7rpuxKwDnAm4BVgSuA6yWt0/Q9dsHBx8ysbAO/k+l4YEJEPBIRs4BjgHGS1ntdVSImRcS1ETEzIl6NiAuAucB/NH+Dr+fgY2ZWtv4Fn1GSptYd4+tfWtJIYF3gjkVvFzENmA2MWVLVJL0NGAX8rT+32MgJB2Zm1TYjIsb2cH2F/HNWw/mZwIieXljSasDVwLci4qGma9gFB58G89dYjscPfnfZ1Wi5Nc6dV3YVWu793z2o7CoMiJsu/kHZVWi5LU47rOwqtC0x4PN85uSfKzacH0lq/XRJ0prAFODXwJdaXSl3u5mZlW0Ax3wiYiYwHdiqdk7ShqRWzz1dPUfS+sCtwPURcXhEtDw8OviYmZWpmGy3icCxkjaQNAKYAEyOiEcbC0raBLgN+FFEHN2iu3wdBx8zs7INfLbbacAvgL8ATwBDgb0AJO0paW5d2WOBtYAjJM2tO/bsxx2+jsd8zMwGuYhYABydj8Zrk4BJdY/3B/Yf6Do5+JiZla0Dl9dx8DEzK1knrmrt4GNmVrYODD5OODAzs8K55WNmVqa+Za0NGg4+ZmYl85iPmZkVrwODj8d8zMyscG75mJmVzN1uZmZWPAcfMzMrVIdmu3nMx8zMCueWj5lZiZSPTuPgY2ZWtg7sdnPwMTMrmbPdzMyseB0YfJxwYGZmhXPLx8ysbG75FEvSxZKu6+L8WEkhaf26c3tI+mPeS/xFSX+StFfD89bPz6sd8yQ9KOl1W8eambWFSGM+zR5VVYluN0kTgIuAnwFvB7YErgEulHRaF08ZB6wBjAa+CXxT0m4FVdfMrG+iH0dFtX23m6StgWOAIyPiO3WXJkiaB5wl6ZqI+HPdteci4qn874skfRbYCriykEqbmVmPqtDy2ROYC5zfxbULgBeB3bt6opJtgE2BPw1YDc3M+qETu93aoeUzTtLchnP1QXE08EhEzG98YkTMkzQN2Ljh0u8kLQSWBoYB34mIa7qrgKTxwHiApVZcqYlbMDPrhwoHkWa1Q/D5HfmDv85bgWv78Zp7APeSAs9bgXMkvRgRJ3RVOCImAhMBhq+5Tgf+b2BmZapyC6ZZ7RB8XoqIh+tPSBpZ9/BBYFtJy0TEvIZyywAbATc3vOY/617zAUkbAV+T9PWIeLm11Tczs76qwpjPj4DlgEO7uHZYvnbFEl5jASnQLt3aqpmZ9VN/Mt0q3GJqh5ZPjyLidknfJmW3LUPqjgvgE8DXgAkNmW4Aq0hanXR/bwM+D9wcEbMLrLqZWe9UOIg0q+2DD0BEHC3pbuCzwFfy6b8BB0XEZV085Yb8cwHwJPAr4PgBr6iZWR8Jj/kULiL26+b8VBq2uMhBpqtAU1/m0cbnmZlZ+6lEy8fMbFBzy8fMzIqm6Lzo4+BjZlamimetNcvBx8ysZJ2YcFCFeT5mZjbIuOVjZla2Dmz5OPiYmZWsE7vdHHzMzMrWgcHHYz5mZlY4Bx8zszL1YyO53nbXSRoq6QxJz0qaI+lqSaN6KD9O0n2S/i3pXkk7tup2axx8zMzKNvCrWh8H7AS8A1g7n+tyuTJJGwLXAN8EVsw/r5W0fp/uaQkcfMzMSlRbWHSAt9EeT9oB4JGImAUcQ9pFer0uyu4L3BERl0fE/IiYBNyZz7eMg4+ZWbWNkjS17lhsZ+i8Oee6wB21cxExDZgNjOni9cbUl83u7KZs05ztZmZWtv6t7TYjIsb2cH2F/HNWw/mZwIhuyndVdrNmKtcdBx8zs5IN8DyfOfnnig3nR5JaP12V723ZprnbzcysTAO8jXZEzASmA1vVzuWkghHAPV085e76stmW+XzLOPiYmQ1+E4FjJW0gaQQwAZicN+BsdCkwVtLukoZJ2h14O3BJKyvk4GNmVjItbP7opdOAXwB/AZ4AhgJ7AUjaU9LcWsGcjLALcAKpq+0E4BPdBKqmeczHzKxsA7y8TkQsAI7OR+O1ScCkhnM3ADcMZJ0cfBosPWsB617XmOhRfbNHr7DkQhWjhYNzQawtTjus7Cq03F3HnV92FQbE0LNb8zpeWNTMzIoV9DfVupI85mNmZoVzy8fMrGTudjMzs+I5+JiZWZFqC4t2Go/5mJlZ4dzyMTMrU0RHZrs5+JiZlawTu90cfMzMytaBwcdjPmZmVji3fMzMSuZuNzMzK1YAg3Sdwp44+JiZla3zYo/HfMzMrHhu+ZiZlcxjPmZmVjxPMjUzs6K55WNmZsUKnHBgZmZWBLd8zMxKlLZU6Lymj4OPmVnZFpZdgeI5+JiZlawTWz4e8zEzs8K1ffCRdLGkkHRhF9cm5GvX5ccnd1VW0vr5/Nii6m1m1ivRz6Oi2j74ZI8Du0parnZC0lLAPsD0hrIvA/tKekuB9TMza1K8tptpM0dFVSX43AM8BOxad+6jpEBzS0PZacBk4LRCamZm1k+K5o+qqkrwAbgQOKDu8QHARXTd8DwO+KikbYuomJmZ9U2Vgs8VwFhJb5a0OjAOuLirghHxN+BS4PTevLCk8ZKmSpr6yqsvtaq+Zma904HdbpVJtY6IFyRdS2rxzARuiYjpkrp7yknAg5J2Ae5cwmtPBCYCjFhuzer+Ns2segLkeT5t74fAJcBcUnDpVkQ8Lukc4Juk8SEzs/ZU4RZMs6rU7QbwG2A+MAr4aS/KfxNYFThoAOtkZmZ9VKmWT0SEpM0BRcS8XpR/QdKpwNcGvnZmZk3qvIZP5Vo+RMSciJjdh6ecAzwzUPUxM+svRTR9VFXbt3wiYr/eXo+Ik4GTG67PA9Zrfc3MzFqkwkGkWW0ffMzMBrWgI1e1rly3m5mZVZ9bPmZmJRLVHrtplls+ZmZla5MVDiQtK+mHkmbm40JJb+ih/D6S/iDpBUkzJF0v6W29eS8HHzOzsrVJ8AHOBjYBNgZGA5sCZ/ZQfgXgK8DawFqk1WR+LWnZJb2Rg4+ZmZFbOHsBJ0bE0xHxDHAiaYua4V09JyLOi4gpEfFiziz+GrA6KYD1yMHHzKxMtWy3Zg8YVVsYOR/jm6zJxsBw4I66c3cCbyC1gnrjA8BLpC1weuSEAzOzkvUz4WBGRPS4S7Oki4F9eyjyDdI+aACz6s7X/j1iSZWQNJq0zc1RETFnSeUdfMzMyjbw2W6HA0f3cP0lXmvdrEjaOaD2b4AeV5XJO0dPAb4VEd/tTYUcfMzMBrmImEvaDaBbkv5B2h16K+CmfHpL4N/Agz08byvgBuBrEXFOb+vk4GNmVqr22BQuIv4t6XLgFEn35tOnAJdGxMtdPUfSNsB1wDER8f2+vJ8TDszMyhS0U6r1EaRWTu34B3Bk7aKkL0u6r67810ldc2dJmlt3bLukN3LLx8ysbG2ytltEvEjaLfqAbq6fCpxa93j7Zt/LLR8zMyucWz5mZiXrxLXdHHzMzMrm4GNmZoUKYKGDj5mZFao9Uq2L5oQDMzMrnFs+Dea89OSMKXd+9bGC3m4UMKOQd7qzkHeBIu+pWL6vfhh69kC/w2KK/F2t15JX6cCWj4NPg4hYtaj3kjR1SQsCVs1gvCfwfVVJJe/JwcfMzArVoQkHHvMxM7PCueVTrollV2AADMZ7At9XlVTsngKiTdbXKZCDT4kiomJ/JEs2GO8JfF9VUsl78piPmZkVymM+ZmZmxXDLx8ysbO52MzOzwjn4mJlZsby2m5mZWSHc8jEzK1MACz3Px8zMitaB3W4OPmZmZXPwMTOzYoUnmZqZmRXBLR8zszIFhBcWNTOzwnVgt5uDj5lZ2Tow4cBjPmZmVji3fMzMyhThSaZmZlaCDux2c/AxMytZdGDLx2M+ZmZWOLd8zMxK1ZlbKjj4mJmVKfA8HzMzK0EHrnDgMR8zMyucWz5mZiUKINztZmZmhYroyG43Bx8zs5K55WNmZsXrwJaPEw7MzKxwig6c3GRm1i4k3QCM6sdLzIiIca2qT1EcfMzMrHDudjMzs8I5+JiZWeEcfMzMrHAOPmZmVjgHHzMzK9z/A72EyIwu+m5MAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x432 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cr = dataTarget.corr()\n",
    "\n",
    "f = plt.figure(\n",
    "    figsize=(6, 6)\n",
    ")\n",
    "\n",
    "plt.matshow(\n",
    "    cr, \n",
    "    fignum=f.number\n",
    ")\n",
    "\n",
    "plt.xticks(\n",
    "    range(dataTarget.shape[1]), \n",
    "    dataTarget.columns, \n",
    "    fontsize=14, \n",
    "    rotation=50\n",
    ")\n",
    "\n",
    "plt.yticks(\n",
    "    range(dataTarget.shape[1]), \n",
    "    dataTarget.columns, \n",
    "    fontsize=14\n",
    ")\n",
    "\n",
    "cb = plt.colorbar()\n",
    "cb.ax.tick_params(\n",
    "    labelsize=13\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "267ba24e",
   "metadata": {},
   "source": [
    "特征之间无明显相关性"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d88adf0e",
   "metadata": {},
   "source": [
    "### 测试数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "86b67f52",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(50, 6)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>COc1cc(OC)cc(\\C=C\\c2ccc(OS(=O)(=O)[C@@H]3C[C@@...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCOc3ccccc23)c4ccc(O...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>COc1ccc2C(=C(CCOc2c1)c3ccc(O)cc3)c4ccc(\\C=C\\C(...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCOc3cc(F)ccc23)c4cc...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCSc3cc(F)ccc23)c4cc...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              SMILES  Caco-2  CYP3A4  hERG  \\\n",
       "0  COc1cc(OC)cc(\\C=C\\c2ccc(OS(=O)(=O)[C@@H]3C[C@@...       0       1     1   \n",
       "1  OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCOc3ccccc23)c4ccc(O...       0       1     0   \n",
       "2  COc1ccc2C(=C(CCOc2c1)c3ccc(O)cc3)c4ccc(\\C=C\\C(...       0       1     1   \n",
       "3  OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCOc3cc(F)ccc23)c4cc...       0       1     1   \n",
       "4  OC(=O)\\C=C\\c1ccc(cc1)C2=C(CCSc3cc(F)ccc23)c4cc...       0       1     1   \n",
       "\n",
       "   HOB  MN  \n",
       "0    0   1  \n",
       "1    0   1  \n",
       "2    0   1  \n",
       "3    0   1  \n",
       "4    0   1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataADMET_Test = pd.read_excel('../ADMET.xlsx', sheet_name=sheets[1])\n",
    "print(dataADMET_Test.shape)\n",
    "dataADMET_Test.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "2fb3edd1",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(50, 729)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>nAtom</th>\n",
       "      <th>nHeavyAtom</th>\n",
       "      <th>nH</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0</td>\n",
       "      <td>0.7918</td>\n",
       "      <td>0.626947</td>\n",
       "      <td>109.7646</td>\n",
       "      <td>62.896583</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>57</td>\n",
       "      <td>26</td>\n",
       "      <td>31</td>\n",
       "      <td>...</td>\n",
       "      <td>357.230394</td>\n",
       "      <td>51.435682</td>\n",
       "      <td>1.978295</td>\n",
       "      <td>11.194856</td>\n",
       "      <td>7.887218</td>\n",
       "      <td>3.307637</td>\n",
       "      <td>1549</td>\n",
       "      <td>45</td>\n",
       "      <td>5.309</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>1</td>\n",
       "      <td>-12.9741</td>\n",
       "      <td>168.327271</td>\n",
       "      <td>338.7341</td>\n",
       "      <td>221.678851</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>209</td>\n",
       "      <td>102</td>\n",
       "      <td>107</td>\n",
       "      <td>...</td>\n",
       "      <td>1437.820466</td>\n",
       "      <td>200.252007</td>\n",
       "      <td>1.963255</td>\n",
       "      <td>100.205330</td>\n",
       "      <td>39.991503</td>\n",
       "      <td>60.213826</td>\n",
       "      <td>67587</td>\n",
       "      <td>152</td>\n",
       "      <td>-1.383</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0</td>\n",
       "      <td>1.3775</td>\n",
       "      <td>1.897506</td>\n",
       "      <td>96.8772</td>\n",
       "      <td>54.478618</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>47</td>\n",
       "      <td>21</td>\n",
       "      <td>26</td>\n",
       "      <td>...</td>\n",
       "      <td>302.170436</td>\n",
       "      <td>41.709176</td>\n",
       "      <td>1.986151</td>\n",
       "      <td>5.555562</td>\n",
       "      <td>2.515686</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>847</td>\n",
       "      <td>34</td>\n",
       "      <td>5.069</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>1</td>\n",
       "      <td>-12.8632</td>\n",
       "      <td>165.461914</td>\n",
       "      <td>325.4504</td>\n",
       "      <td>212.760093</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>200</td>\n",
       "      <td>99</td>\n",
       "      <td>101</td>\n",
       "      <td>...</td>\n",
       "      <td>1395.762283</td>\n",
       "      <td>194.429683</td>\n",
       "      <td>1.963936</td>\n",
       "      <td>97.319365</td>\n",
       "      <td>42.378404</td>\n",
       "      <td>54.940962</td>\n",
       "      <td>63277</td>\n",
       "      <td>148</td>\n",
       "      <td>-3.044</td>\n",
       "      <td>476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "      <td>2.9491</td>\n",
       "      <td>8.697191</td>\n",
       "      <td>145.3755</td>\n",
       "      <td>73.773204</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>62</td>\n",
       "      <td>34</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>457.205322</td>\n",
       "      <td>69.559647</td>\n",
       "      <td>2.045872</td>\n",
       "      <td>13.925208</td>\n",
       "      <td>8.176666</td>\n",
       "      <td>3.197305</td>\n",
       "      <td>3614</td>\n",
       "      <td>59</td>\n",
       "      <td>4.364</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 729 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    nAcid    ALogP      ALogp2       AMR        apol  naAromAtom  nAromBond  \\\n",
       "25      0   0.7918    0.626947  109.7646   62.896583          11         11   \n",
       "44      1 -12.9741  168.327271  338.7341  221.678851          10         10   \n",
       "23      0   1.3775    1.897506   96.8772   54.478618          11         11   \n",
       "42      1 -12.8632  165.461914  325.4504  212.760093          10         10   \n",
       "9       0   2.9491    8.697191  145.3755   73.773204          18         18   \n",
       "\n",
       "    nAtom  nHeavyAtom   nH  ...           MW      WTPT-1    WTPT-2  \\\n",
       "25     57          26   31  ...   357.230394   51.435682  1.978295   \n",
       "44    209         102  107  ...  1437.820466  200.252007  1.963255   \n",
       "23     47          21   26  ...   302.170436   41.709176  1.986151   \n",
       "42    200          99  101  ...  1395.762283  194.429683  1.963936   \n",
       "9      62          34   28  ...   457.205322   69.559647  2.045872   \n",
       "\n",
       "        WTPT-3     WTPT-4     WTPT-5  WPATH  WPOL  XLogP  Zagreb  \n",
       "25   11.194856   7.887218   3.307637   1549    45  5.309     130  \n",
       "44  100.205330  39.991503  60.213826  67587   152 -1.383     490  \n",
       "23    5.555562   2.515686   0.000000    847    34  5.069     106  \n",
       "42   97.319365  42.378404  54.940962  63277   148 -3.044     476  \n",
       "9    13.925208   8.176666   3.197305   3614    59  4.364     176  \n",
       "\n",
       "[5 rows x 729 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataMol_Test = pd.read_excel('../Molecular_Descriptor.xlsx', sheet_name=sheets[1])\n",
    "\n",
    "dataMol_Test = dataMol_Test[dataMol_Test.columns[1:].tolist()]\n",
    "\n",
    "print(dataMol_Test.shape)\n",
    "dataMol_Test.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "90d46b2c",
   "metadata": {},
   "source": [
    "## 多标签分类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "alien-flexibility",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.multioutput import MultiOutputClassifier\n",
    "\n",
    "from sklearn.model_selection import KFold\n",
    "\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "X = dataFeature.values\n",
    "y = dataTarget.values\n",
    "test = dataMol_Test.values\n",
    "\n",
    "# X_train, X_test, y_train, y_test = train_test_split(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7e483aea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50, 729)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = test.astype('float64')\n",
    "test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "recorded-unknown",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-10-18 02:56:14.630164: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:14.630187: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf \n",
    "import tensorflow.keras.backend as K \n",
    "import random \n",
    "from tensorflow.keras.callbacks import ReduceLROnPlateau \n",
    "from sklearn.model_selection import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "alpha-cooperative",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_model():\n",
    "    model = tf.keras.Sequential() \n",
    "    model.add(tf.keras.layers.Input(shape=(X.shape[1],))) \n",
    "    model.add(tf.keras.layers.BatchNormalization()) \n",
    "    model.add(tf.keras.layers.Dense(units=1024,activation='relu')) \n",
    "\n",
    "    model.add(tf.keras.layers.BatchNormalization()) \n",
    "    model.add(tf.keras.layers.Dropout(0.5)) \n",
    "    model.add(tf.keras.layers.Dense(units=2048,activation='relu')) \n",
    "    \n",
    "    model.add(tf.keras.layers.BatchNormalization()) \n",
    "    model.add(tf.keras.layers.Dropout(0.5)) \n",
    "    model.add(tf.keras.layers.Dense(5, activation=\"sigmoid\"))  \n",
    "\n",
    "    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[\"accuracy\",\"binary_crossentropy\"])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "hydraulic-elements",
   "metadata": {},
   "outputs": [],
   "source": [
    "pred = np.zeros((X.shape[0], 5))\n",
    "test_pred = np.zeros((dataMol_Test.shape[0], 5)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c9de7e12",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = test.astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "combined-cartoon",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold number:  1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-10-18 02:56:15.456845: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
      "2021-10-18 02:56:15.457521: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:15.457625: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:15.457715: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:15.459330: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:15.459401: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lcoal/cuda-10.1/lib64:\n",
      "2021-10-18 02:56:15.459528: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
      "Skipping registering GPU devices...\n",
      "2021-10-18 02:56:15.459865: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2021-10-18 02:56:15.539420: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "\n",
      "Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.\n",
      "\n",
      "Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.\n",
      "\n",
      "Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.\n",
      "train [0.09163211286067963, 0.42004504799842834, 0.09163211286067963]\n",
      "val [0.35415974259376526, 0.20202019810676575, 0.35415974259376526]\n",
      "test_pred ---\n",
      "Fold number:  2\n",
      "\n",
      "Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "train [0.1038832738995552, 0.3699324429035187, 0.1038832738995552]\n",
      "val [0.20796866714954376, 0.3585858643054962, 0.20796866714954376]\n",
      "test_pred ---\n",
      "Fold number:  3\n",
      "\n",
      "Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "\n",
      "Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.\n",
      "\n",
      "Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.\n",
      "train [0.09000195562839508, 0.3355855941772461, 0.09000195562839508]\n",
      "val [0.6022710800170898, 0.24242424964904785, 0.6022710800170898]\n",
      "test_pred ---\n",
      "Fold number:  4\n",
      "\n",
      "Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00030: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "\n",
      "Epoch 00033: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.\n",
      "train [0.10393238812685013, 0.3755630552768707, 0.10393238812685013]\n",
      "val [0.33218860626220703, 0.5707070827484131, 0.33218860626220703]\n",
      "test_pred ---\n",
      "Fold number:  5\n",
      "\n",
      "Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00034: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "train [0.09082002937793732, 0.4366910457611084, 0.09082002937793732]\n",
      "val [0.2882075011730194, 0.3350253701210022, 0.2882075011730194]\n",
      "WARNING:tensorflow:5 out of the last 13 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7f72280cb550> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
      "test_pred ---\n",
      "Fold number:  6\n",
      "\n",
      "Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00031: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "\n",
      "Epoch 00034: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.\n",
      "train [0.07819149643182755, 0.4552616775035858, 0.07819149643182755]\n",
      "val [0.5251790285110474, 0.3857868015766144, 0.5251790285110474]\n",
      "WARNING:tensorflow:5 out of the last 13 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7f720058c0d0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.\n",
      "test_pred ---\n",
      "Fold number:  7\n",
      "\n",
      "Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "\n",
      "Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.\n",
      "\n",
      "Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.\n",
      "train [0.06243530660867691, 0.45469892024993896, 0.06243530660867691]\n",
      "val [0.499088317155838, 0.23350253701210022, 0.499088317155838]\n",
      "test_pred ---\n",
      "Fold number:  8\n",
      "\n",
      "Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "\n",
      "Epoch 00033: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.\n",
      "train [0.053858596831560135, 0.456949919462204, 0.053858596831560135]\n",
      "val [0.555937647819519, 0.5532994866371155, 0.555937647819519]\n",
      "test_pred ---\n",
      "Fold number:  9\n",
      "\n",
      "Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "train [0.06678131222724915, 0.4378165304660797, 0.06678131222724915]\n",
      "val [0.23726500570774078, 0.44162437319755554, 0.23726500570774078]\n",
      "test_pred ---\n",
      "Fold number:  10\n",
      "\n",
      "Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
      "train [0.06202450022101402, 0.3939223289489746, 0.06202450022101402]\n",
      "val [0.2597181797027588, 0.4060913622379303, 0.2597181797027588]\n",
      "test_pred ---\n"
     ]
    }
   ],
   "source": [
    "n_split = 10 \n",
    "kfoldnumber= 0 \n",
    "\n",
    "for train_index, validation_index in KFold(n_split).split(X):\n",
    "    kfoldnumber += 1 \n",
    "    print('Fold number: ',kfoldnumber) \n",
    "    \n",
    "    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, min_delta=1e-4, mode='min')\n",
    "    \n",
    "    net = build_model()\n",
    "    net.fit(X[train_index], y[train_index], batch_size=128, epochs=35, \n",
    "            validation_data=(X[validation_index], y[validation_index]), verbose=0, callbacks=[reduce_lr_loss])\n",
    "    \n",
    "    print(\"train\", net.evaluate(X[train_index], y[train_index], verbose=0, batch_size=128))\n",
    "    print(\"val\", net.evaluate(X[validation_index], y[validation_index], verbose=0, batch_size=128))\n",
    "\n",
    "    pred[validation_index] = net.predict(X[validation_index], batch_size=128, verbose=0)\n",
    "    \n",
    "    print('test_pred ---')\n",
    "    test_pred += net.predict(test, batch_size=128, verbose=0) / n_split \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "hourly-strike",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50, 5)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "b04437cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Caco-2', 'CYP3A4', 'hERG', 'HOB', 'MN']"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataADMET_Test.columns[1:].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "stylish-crest",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Caco-2</th>\n",
       "      <th>CYP3A4</th>\n",
       "      <th>hERG</th>\n",
       "      <th>HOB</th>\n",
       "      <th>MN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Caco-2  CYP3A4  hERG  HOB   MN\n",
       "0      0.0     1.0   1.0  0.0  1.0\n",
       "1      0.0     1.0   0.0  0.0  1.0\n",
       "2      0.0     1.0   1.0  0.0  1.0\n",
       "3      0.0     1.0   1.0  0.0  1.0\n",
       "4      0.0     1.0   1.0  0.0  1.0\n",
       "5      0.0     1.0   1.0  0.0  1.0\n",
       "6      0.0     1.0   1.0  0.0  1.0\n",
       "7      0.0     1.0   1.0  0.0  1.0\n",
       "8      0.0     1.0   1.0  0.0  1.0\n",
       "9      0.0     1.0   1.0  0.0  1.0\n",
       "10     0.0     1.0   1.0  0.0  1.0\n",
       "11     0.0     1.0   1.0  0.0  1.0\n",
       "12     0.0     1.0   1.0  0.0  1.0\n",
       "13     0.0     1.0   1.0  0.0  1.0\n",
       "14     0.0     1.0   1.0  0.0  1.0\n",
       "15     0.0     1.0   1.0  0.0  1.0\n",
       "16     0.0     1.0   1.0  0.0  1.0\n",
       "17     0.0     1.0   1.0  0.0  1.0\n",
       "18     0.0     1.0   0.0  0.0  1.0\n",
       "19     0.0     0.0   0.0  0.0  1.0\n",
       "20     0.0     1.0   1.0  0.0  1.0\n",
       "21     0.0     1.0   1.0  0.0  1.0\n",
       "22     1.0     0.0   1.0  0.0  0.0\n",
       "23     1.0     0.0   1.0  0.0  0.0\n",
       "24     1.0     1.0   1.0  0.0  0.0\n",
       "25     1.0     1.0   1.0  0.0  0.0\n",
       "26     0.0     1.0   1.0  0.0  0.0\n",
       "27     0.0     1.0   1.0  0.0  1.0\n",
       "28     0.0     1.0   1.0  0.0  1.0\n",
       "29     0.0     1.0   1.0  1.0  1.0\n",
       "30     1.0     1.0   1.0  1.0  1.0\n",
       "31     1.0     1.0   1.0  1.0  0.0\n",
       "32     1.0     1.0   1.0  1.0  1.0\n",
       "33     0.0     1.0   1.0  1.0  1.0\n",
       "34     0.0     1.0   1.0  1.0  1.0\n",
       "35     0.0     1.0   1.0  0.0  1.0\n",
       "36     0.0     1.0   1.0  0.0  1.0\n",
       "37     0.0     1.0   1.0  0.0  0.0\n",
       "38     0.0     1.0   0.0  0.0  1.0\n",
       "39     0.0     1.0   0.0  0.0  1.0\n",
       "40     0.0     1.0   0.0  0.0  1.0\n",
       "41     0.0     1.0   0.0  0.0  1.0\n",
       "42     0.0     1.0   0.0  0.0  1.0\n",
       "43     0.0     1.0   0.0  0.0  1.0\n",
       "44     0.0     1.0   0.0  0.0  1.0\n",
       "45     0.0     1.0   1.0  0.0  1.0\n",
       "46     0.0     1.0   1.0  0.0  1.0\n",
       "47     0.0     1.0   1.0  0.0  1.0\n",
       "48     0.0     1.0   1.0  0.0  1.0\n",
       "49     0.0     1.0   1.0  0.0  0.0"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df = pd.DataFrame(data=test_pred, columns=dataADMET_Test.columns[1:].tolist())\n",
    "test_pred_df = test_pred_df.round() # 取整，写入为整数值，无 .0\n",
    "test_pred_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "incomplete-unknown",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred_df.to_excel('../ADMET.xlsx_test.xlsx', sheet_name=sheets[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d328f4b8",
   "metadata": {},
   "source": [
    "#### `sklearn.metrics`库中暂不支持[多类多输出分类任务的评价标准](https://scikit-learn.org/stable/modules/multiclass.html#multiclass-multioutput-classification)，这里暂时不做模型评估"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50badd21",
   "metadata": {},
   "source": [
    "### RandomForestClassifier\n",
    "- [sklearn - Multilabel classification](https://scikit-learn.org/stable/modules/multiclass.html#multilabel-classification)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c6368949",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.multioutput import MultiOutputClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.utils import shuffle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "32d6130b",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_samples, n_features = X.shape\n",
    "\n",
    "n_outputs = y.shape[1]  # 5 类\n",
    "\n",
    "n_classes = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9d060d60",
   "metadata": {},
   "outputs": [],
   "source": [
    "forest = RandomForestClassifier(random_state=666)\n",
    "\n",
    "multi_target_forest = MultiOutputClassifier(forest, n_jobs=-1)\n",
    "\n",
    "model = multi_target_forest.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "c5325375",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 0, 0, 1],\n",
       "       [0, 0, 0, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [1, 0, 0, 0, 0],\n",
       "       [1, 0, 0, 0, 0],\n",
       "       [1, 1, 1, 0, 0],\n",
       "       [1, 1, 1, 0, 0],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 1, 1],\n",
       "       [0, 1, 1, 1, 1],\n",
       "       [1, 1, 1, 1, 1],\n",
       "       [0, 1, 1, 1, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 0, 0, 1],\n",
       "       [0, 1, 0, 0, 1],\n",
       "       [0, 1, 1, 0, 0],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 1],\n",
       "       [0, 1, 1, 0, 0]])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be4a495f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tf2",
   "language": "python",
   "name": "tf2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
